use of org.marc4j.marc.DataField in project RecordManager2 by moravianlibrary.
the class PatentsXmlStreamReader method next.
/**
* Returns the next record in the iteration.
*
* @return Record - the record object
*/
public Record next() {
record = null;
DataField df = null;
String name = null;
// first to field 100, others 700
boolean firstAuthor = true;
boolean author = true;
boolean authorAvailable = true;
// first to field 110, others 710
boolean firstCorporate = true;
boolean b072 = false;
boolean abstratcs = false;
boolean appl_reference = false;
boolean public_reference = false;
char date = ' ';
String patentType = "";
String dateStr = "";
try {
while (xmlReader.hasNext()) {
switch(xmlReader.getEventType()) {
case XMLStreamReader.START_ELEMENT:
switch(xmlReader.getLocalName()) {
case ELEMENT_RECORD_EP:
case ELEMENT_RECORD_CZ:
record = factory.newRecord();
patentType = addIdentifier();
addFields(patentType);
addUrl();
addDocNumber();
break;
case ELEMENT_APPLICANT:
case ELEMENT_INVENTOR:
case ELEMENT_AGENT:
author = true;
break;
case ELEMENT_ORGNAME:
name = xmlReader.getElementText();
author = false;
break;
case ELEMENT_FIRST_NAME:
if (name == null)
name = xmlReader.getElementText();
else
name += ", " + xmlReader.getElementText();
break;
case ELEMENT_LAST_NAME:
if (name == null)
name = xmlReader.getElementText();
else
name = xmlReader.getElementText() + ", " + name;
break;
case ELEMENT_INVENTION_TITLE:
df = factory.newDataField("TMP", ' ', ' ');
String a = xmlReader.getAttributeValue(null, ATTRIBUTE_LANG);
df.addSubfield(factory.newSubfield('a', xmlReader.getElementText()));
if (a.equalsIgnoreCase("cs")) {
df.setTag("245");
if (!authorAvailable) {
df.addSubfield(factory.newSubfield('c', AUTHOR_NOT_AVAILABLE));
}
} else {
df.setTag("246");
df.setIndicator1('2');
}
record.addVariableField(df);
break;
case ELEMENT_ABSTRACT:
if (xmlReader.getAttributeValue(null, ATTRIBUTE_LANG).equalsIgnoreCase("cs")) {
abstratcs = true;
}
break;
case ELEMENT_CLASSIFICATION_IPCR:
if (xmlReader.getAttributeValue(null, ATTRIBUTE_SEQUENCE).equals("1")) {
df = factory.newDataField("653", ' ', ' ');
b072 = true;
} else
df = null;
break;
case ELEMENT_TEXT:
String data = xmlReader.getElementText();
if (b072 && df != null && data.length() >= 4) {
String s = data.substring(0, 4);
List<String> get = propertyResolver.resolve(PATENTS_MAP).get(s);
if (get != null) {
String[] temp = get.get(0).split("\\|");
if (temp.length == 2) {
df.addSubfield(factory.newSubfield('a', temp[0]));
record.addVariableField(createField072(temp[1]));
}
}
b072 = false;
}
addField024(data);
break;
case ELEMENT_P:
if (abstratcs) {
df = factory.newDataField("520", '3', ' ');
df.addSubfield(factory.newSubfield('a', getText().trim()));
record.addVariableField(df);
abstratcs = false;
}
break;
case ELEMENT_PUBLICATION_REFERENCE:
public_reference = true;
date = 'p';
break;
case ELEMENT_APPLICATION_REFERENCE:
appl_reference = true;
date = 'a';
break;
case ELEMENT_DATE:
dateStr = xmlReader.getElementText();
if (date == 'p') {
addField008(dateStr);
addField260(dateStr);
switch(patentType) {
case PATENT_TYPE_A3:
addField500aDate(TEXT_500a_PUBLICATION_A3, dateStr);
break;
case PATENT_TYPE_B6:
addField500aDate(TEXT_500a_PUBLICATION_B6, dateStr);
break;
case PATENT_TYPE_U1:
addField500aDate(TEXT_500a_PUBLICATION_U1, dateStr);
break;
default:
break;
}
} else if (date == 'a') {
addField500aDate(TEXT_500a_APPLICATION, dateStr);
}
date = ' ';
break;
case ELEMENT_DOC_NUMBER:
String docNumber = xmlReader.getElementText();
if (appl_reference) {
add500aApplDocNumber(docNumber, patentType);
} else if (public_reference) {
addField013(docNumber, patentType, dateStr);
}
break;
}
break;
case XMLStreamReader.END_ELEMENT:
switch(xmlReader.getLocalName()) {
case ELEMENT_APPLICANT:
if (isAuthorAvailable(name)) {
addAuthor(author, firstAuthor, firstCorporate, name, "pta");
if (author)
firstAuthor = false;
else
firstCorporate = false;
} else
authorAvailable = false;
name = null;
break;
case ELEMENT_INVENTOR:
if (isAuthorAvailable(name)) {
addAuthor(author, firstAuthor, firstCorporate, name, "inv");
if (author)
firstAuthor = false;
else
firstCorporate = false;
} else
authorAvailable = false;
name = null;
break;
case ELEMENT_AGENT:
if (isAuthorAvailable(name)) {
addAuthor(author, firstAuthor, firstCorporate, name, "pth");
if (author)
firstAuthor = false;
else
firstCorporate = false;
} else
authorAvailable = false;
name = null;
break;
case ELEMENT_CLASSIFICATION_IPCR:
if (df != null)
record.addVariableField(df);
break;
case ELEMENT_RECORD_EP:
case ELEMENT_RECORD_CZ:
while (xmlReader.hasNext() && xmlReader.getEventType() != XMLStreamReader.START_ELEMENT) {
xmlReader.next();
}
return RecordUtils.sortFields(record);
case ELEMENT_APPLICATION_REFERENCE:
appl_reference = false;
break;
case ELEMENT_PUBLICATION_REFERENCE:
public_reference = false;
break;
}
break;
}
xmlReader.next();
}
} catch (XMLStreamException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return RecordUtils.sortFields(record);
}
use of org.marc4j.marc.DataField in project RecordManager2 by moravianlibrary.
the class PatentsXmlStreamReader method addField260.
private void addField260(String date) {
if (date != null && date.length() >= 4) {
DataField df = factory.newDataField("260", ' ', ' ');
df.addSubfield(factory.newSubfield('a', TEXT_260a));
df.addSubfield(factory.newSubfield('b', TEXT_260b));
df.addSubfield(factory.newSubfield('c', date.substring(0, 4)));
record.addVariableField(df);
}
}
use of org.marc4j.marc.DataField in project RecordManager2 by moravianlibrary.
the class PatentsXmlStreamReader method addUrl.
private void addUrl() {
String url = null;
Matcher matcher = A3_PATTERN.matcher(record.getControlNumber());
if (matcher.matches()) {
url = String.format(A3_URL, matcher.group(1), matcher.group(1), StringUtils.leftPad(matcher.group(2), 4, '0'));
}
matcher = B6_PATTERN.matcher(record.getControlNumber());
if (matcher.matches()) {
String dir;
if (matcher.group(1).length() <= 3)
dir = "0";
else {
dir = matcher.group(1).substring(0, matcher.group(1).length() - 3);
}
url = String.format(B6_URL, dir, matcher.group(1));
}
matcher = U1_PATTERN.matcher(record.getControlNumber());
if (matcher.matches()) {
String dir;
if (matcher.group(1).length() <= 3)
dir = "0000";
else {
dir = StringUtils.leftPad(matcher.group(1).substring(0, matcher.group(1).length() - 3), 4, '0');
}
url = String.format(U1_URL, dir, StringUtils.leftPad(matcher.group(1), 6, '0'));
}
if (url != null) {
DataField df = factory.newDataField("856", '4', ' ', "u", url, "y", TEXT_856y);
record.addVariableField(df);
}
}
use of org.marc4j.marc.DataField in project RecordManager2 by moravianlibrary.
the class MetadataMarcRecord method getPublisherNumber.
@Override
public List<PublisherNumber> getPublisherNumber() {
List<PublisherNumber> results = new ArrayList<>();
Long i = 0L;
for (DataField df : underlayingMarc.getDataFields("028")) {
if (df.getIndicator1() == '0' && df.getSubfield('a') != null) {
String result = PUBLISHER_NUMBER_PATTERN.matcher(df.getSubfield('a').getData().toLowerCase()).replaceAll("");
results.add(new PublisherNumber(result, ++i));
}
}
return results;
}
use of org.marc4j.marc.DataField in project RecordManager2 by moravianlibrary.
the class MetadataMarcRecord method getLanguages.
@Override
public List<String> getLanguages() {
Set<String> result = new HashSet<>();
for (DataField df : underlayingMarc.getDataFields("041")) {
for (Subfield subA : df.getSubfields('a')) {
String lang = null;
if (subA.getData().toLowerCase().equals("cze")) {
lang = "cze";
} else if (subA.getData().toLowerCase().equals("eng")) {
lang = "eng";
} else {
lang = "oth";
}
result.add(lang);
}
}
if (result.isEmpty()) {
String cf = underlayingMarc.getControlField("008");
if (cf != null && cf.length() > 39) {
String substr = cf.substring(35, 38);
String lang = null;
if (substr.toLowerCase().equals("cze")) {
lang = "cze";
} else if (substr.toLowerCase().equals("eng")) {
lang = "eng";
}
if (lang != null) {
result.add(lang);
}
}
}
return new ArrayList<String>(result);
}
Aggregations