-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathDocumentCluster.java
More file actions
55 lines (47 loc) · 1.34 KB
/
Copy pathDocumentCluster.java
File metadata and controls
55 lines (47 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import java.util.*;
public class DocumentCluster {
private ArrayList<Document> docs;
private Map<WordStem,Double> stemCounts;
public DocumentCluster(Document doc, Map<WordStem,Double> counts){
docs = new ArrayList<Document>();
docs.add(doc);
stemCounts = counts;
}
public void merge(DocumentCluster cluster){
docs.addAll(cluster.getDocuments());
Map<WordStem,Double> counts = cluster.getCounts();
for(WordStem stem : counts.keySet()){
Double count = stemCounts.get(stem);
if(count != null){
stemCounts.put(stem, count+counts.get(stem));
}
else{
WordStem s = getLeastOccuringStem();
if(counts.get(stem) > stemCounts.get(s)){
stemCounts.put(stem, counts.get(stem));
stemCounts.remove(s);
}
}
}
}
private WordStem getLeastOccuringStem(){
WordStem stem = stemCounts.keySet().iterator().next();
double min = stemCounts.get(stem);
for(Map.Entry<WordStem, Double> entry : stemCounts.entrySet()){
if(entry.getValue() < min){
min = entry.getValue();
stem = entry.getKey();
}
}
return stem;
}
public List<Document> getDocuments(){
return Collections.unmodifiableList(docs);
}
public Map<WordStem,Double> getCounts(){
return Collections.unmodifiableMap(stemCounts);
}
public Set<WordStem> getTopWords(){
return Collections.unmodifiableSet(stemCounts.keySet());
}
}