-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRunner.java
More file actions
113 lines (95 loc) · 3.61 KB
/
Runner.java
File metadata and controls
113 lines (95 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package webcrawler;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.concurrent.ConcurrentLinkedQueue;
/**
*
* @author lucas.burdell
*/
public class Runner implements Runnable {
public static final ObservableHashMap<String, Integer> KNOWN_LINKS = new ObservableHashMap<>();
public static final ObservableHashMap<String, Integer> HOSTS = new ObservableHashMap<>();
private static final ConcurrentLinkedQueue<String> LINKS_TO_VISIT = new ConcurrentLinkedQueue();
private static final ObservableAtomicInteger NUMBER_OF_SITES_VISITED = new ObservableAtomicInteger();
private static final ObservableAtomicInteger NUMBER_OF_LINKS_VISITED = new ObservableAtomicInteger();
private static final ObservableAtomicInteger NUMBER_OF_UNIQUE_SITES = new ObservableAtomicInteger();
private static final ObservableAtomicInteger NUMBER_OF_SKIPPED_LINKS = new ObservableAtomicInteger();
private static int CUTOFF_NUMBER = 10;
private boolean running;
private Webpage startPage;
public static ObservableAtomicInteger getNumberOfWebsitesVisited() {
return NUMBER_OF_SITES_VISITED;
}
public static ObservableAtomicInteger getNumberOfUniqueWebsites() {
return NUMBER_OF_UNIQUE_SITES;
}
public static ObservableAtomicInteger getNumberOfLinksVisited() {
return NUMBER_OF_LINKS_VISITED;
}
public static ObservableAtomicInteger getNumberOfSkippedLinks() {
return NUMBER_OF_SKIPPED_LINKS;
}
public static int getCutoffNumber() {
return CUTOFF_NUMBER;
}
public static void setCutoffNumber(int cutoff) {
CUTOFF_NUMBER = cutoff;
}
public Runner() {
running = true;
}
public Runner(Webpage startPage) {
this.startPage = startPage;
LINKS_TO_VISIT.addAll(startPage.getLinks());
running = true;
}
@Override
public void run() {
while (running) {
if (!LINKS_TO_VISIT.isEmpty()) {
String link = null;
try {
link = LINKS_TO_VISIT.remove();
} catch (Exception e) {
continue;
}
if (link == null) {
continue;
}
try {
URL url = new URL(link);
String host = url.getHost();
if (HOSTS.get(host) != null && HOSTS.get(host) >= CUTOFF_NUMBER) {
getNumberOfSkippedLinks().add(1);
continue;
} else if (HOSTS.get(host) != null) {
HOSTS.put(host, HOSTS.get(host) + 1);
} else {
NUMBER_OF_UNIQUE_SITES.add(1);
HOSTS.put(host, 1);
}
NUMBER_OF_SITES_VISITED.add(1);
} catch (MalformedURLException ex) {
}
NUMBER_OF_LINKS_VISITED.add(1);
if (KNOWN_LINKS.get(link) != null) {
KNOWN_LINKS.put(link, KNOWN_LINKS.get(link) + 1);
} else if (KNOWN_LINKS.get(link) == null) {
KNOWN_LINKS.put(link, 1);
Webpage page = new Webpage(link);
LINKS_TO_VISIT.addAll(page.getLinks());
}
} else {
try {
Thread.sleep(250);
} catch (InterruptedException ex) { }
}
}
}
public boolean isRunning() {
return running;
}
public void setRunning(boolean running) {
this.running = running;
}
}