use of structures._NewEggPost in project IR_Base by Linda-sunshine.
the class HTSMAnalyzer method AnalyzeNewEggPostWithSentence.
protected boolean AnalyzeNewEggPostWithSentence(_NewEggPost post) throws ParseException {
String content;
// to avoid empty sentences
ArrayList<_Stn> stnList = new ArrayList<_Stn>();
// Collect the index and counts of features.
ArrayList<HashMap<Integer, Double>> spVcts = new ArrayList<HashMap<Integer, Double>>();
StringBuffer buffer = m_releaseContent ? null : new StringBuffer(256);
// docVct is used to collect DF statistics
HashMap<Integer, Double> docVct = new HashMap<Integer, Double>();
int y = post.getLabel() - 1;
int prosSentenceCounter = 0, consSentenceCounter = 0;
if ((m_prosConsLoad == LoadType.LT_pros || m_prosConsLoad == LoadType.LT_procon || m_prosConsLoad == LoadType.LT_all) && (content = post.getProContent()) != null) {
// sentences in pro section
prosSentenceCounter = analyzeSectionWithStnSplit(content, y, 0, docVct, spVcts, stnList);
if (!m_releaseContent)
buffer.append(String.format("Pros: %s\n", content));
}
if ((m_prosConsLoad == LoadType.LT_cons || m_prosConsLoad == LoadType.LT_procon || m_prosConsLoad == LoadType.LT_all) && (content = post.getConContent()) != null) {
// tokenize cons
consSentenceCounter = analyzeSectionWithStnSplit(content, y, 1, docVct, spVcts, stnList);
if (!m_releaseContent)
buffer.append(String.format("Cons: %s\n", content));
}
if ((m_prosConsLoad == LoadType.LT_comments || m_prosConsLoad == LoadType.LT_all) && (content = post.getComments()) != null) {
// tokenize comments
analyzeSectionWithStnSplit(content, y, -1, docVct, spVcts, stnList);
if (!m_releaseContent)
buffer.append(String.format("Comments: %s\n", content));
}
if (docVct.size() >= m_lengthThreshold && stnList.size() >= m_stnSizeThreshold) {
long timeStamp = m_dateFormatter.parse(post.getDate()).getTime();
// int ID, String name, String prodID, String title, String source, int ylabel, long timeStamp
_Doc doc = new _Doc(m_corpus.getSize(), post.getID(), post.getProdId(), post.getTitle(), (m_releaseContent ? null : buffer.toString()), y, timeStamp);
// source = 2 means the Document is from newEgg
doc.setSourceType(2);
doc.createSpVct(spVcts);
doc.setSentences(stnList);
setStnFvs(doc);
m_corpus.addDoc(doc);
m_classMemberNo[y]++;
m_prosStnCount += prosSentenceCounter;
m_consStnCount += consSentenceCounter;
return true;
} else
return false;
}
use of structures._NewEggPost in project IR_Base by Linda-sunshine.
the class HTSMAnalyzer method AnalyzeNewEggPost.
protected boolean AnalyzeNewEggPost(_NewEggPost post) throws ParseException {
String content;
StringBuffer buffer = m_releaseContent ? null : new StringBuffer(256);
// docVct is used to collect DF
HashMap<Integer, Double> docVct = new HashMap<Integer, Double>();
// Collect the index and counts of features.
ArrayList<HashMap<Integer, Double>> spVcts = new ArrayList<HashMap<Integer, Double>>();
int y = post.getLabel() - 1;
if ((m_prosConsLoad == LoadType.LT_all || m_prosConsLoad == LoadType.LT_procon || m_prosConsLoad == LoadType.LT_pros) && (content = post.getProContent()) != null) {
// load pro section
analyzeSection(content, y, docVct, spVcts);
if (!m_releaseContent)
buffer.append(String.format("Pros: %s\n", content));
}
if ((m_prosConsLoad == LoadType.LT_all || m_prosConsLoad == LoadType.LT_procon || m_prosConsLoad == LoadType.LT_cons) && (content = post.getConContent()) != null) {
// load con section
analyzeSection(content, y, docVct, spVcts);
if (!m_releaseContent)
buffer.append(String.format("Cons: %s\n", content));
}
if ((m_prosConsLoad == LoadType.LT_all || m_prosConsLoad == LoadType.LT_comments) && (content = post.getComments()) != null) {
// load comment section
analyzeSection(content, y, docVct, spVcts);
if (!m_releaseContent)
buffer.append(String.format("Comments: %s\n", content));
}
if (docVct.size() >= m_lengthThreshold) {
long timeStamp = m_dateFormatter.parse(post.getDate()).getTime();
// int ID, String name, String prodID, String title, String source, int ylabel, long timeStamp
_Doc doc = new _Doc(m_corpus.getSize(), post.getID(), post.getProdId(), post.getTitle(), (m_releaseContent ? null : buffer.toString()), y, timeStamp);
// 2 means from newEgg
doc.setSourceType(2);
doc.createSpVct(spVcts);
m_corpus.addDoc(doc);
m_classMemberNo[y]++;
return true;
} else
return false;
}
use of structures._NewEggPost in project IR_Base by Linda-sunshine.
the class HTSMAnalyzer method LoadNewEggDoc.
// Load a document and analyze it.
public void LoadNewEggDoc(String filename) {
JSONObject prod = null;
String item;
JSONArray itemIds, reviews;
try {
JSONObject json = LoadJSON(filename);
prod = json.getJSONObject(m_category);
itemIds = prod.names();
System.out.printf("Under %s category, Number of Items: %d\n", m_category, itemIds.length());
} catch (Exception e) {
System.out.print('X');
return;
}
for (int i = 0; i < itemIds.length(); i++) {
try {
item = itemIds.getString(i);
reviews = prod.getJSONArray(item);
for (int j = 0; j < reviews.length(); j++) {
if (this.m_stnDetector != null)
AnalyzeNewEggPostWithSentence(new _NewEggPost(reviews.getJSONObject(j), item));
else
AnalyzeNewEggPost(new _NewEggPost(reviews.getJSONObject(j), item));
}
} catch (JSONException e) {
System.out.print('P');
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
}
Aggregations