-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsites.go
More file actions
32 lines (29 loc) · 1.21 KB
/
sites.go
File metadata and controls
32 lines (29 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
package main
import "strings"
// siteSelectors maps domain substrings to CSS selectors for site-specific content extraction.
var siteSelectors = map[string]string{
"baeldung.com": ".post-content, article.baeldung-article",
"docusaurus.io": ".theme-doc-markdown, article[role=\"main\"]",
"gitbook.io": ".page-body .page-inner",
"readthedocs.io": "[role=\"main\"], .rst-content",
"readthedocs.org": "[role=\"main\"], .rst-content",
"mkdocs": ".md-content",
"spring.io": ".content, main",
"github.com": ".markdown-body",
"developer.mozilla.org": ".main-page-content, article",
"pkg.go.dev": ".Documentation",
"stackoverflow.com": ".answercell .js-post-body",
"medium.com": "article section",
"dev.to": ".crayons-article__body",
"confluence": ".wiki-content, #main-content",
"notion.so": ".notion-page-content",
}
// siteSelector returns the CSS selector for the given domain if a matching key is found.
func siteSelector(domain string) (string, bool) {
for key, selector := range siteSelectors {
if strings.Contains(domain, key) {
return selector, true
}
}
return "", false
}