-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
98 lines (83 loc) · 3.67 KB
/
Copy pathmain.py
File metadata and controls
98 lines (83 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import re
import argparse
import os
def fetch_js_files(base_url, visited_urls=set(), download=False, find_endpoints=False):
if base_url in visited_urls:
return
print(f"Visiting: {base_url}")
visited_urls.add(base_url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
try:
response = requests.get(base_url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
with open('javascript_files.txt', 'a') as js_file, open('endpoints.txt', 'a') as endpoint_file:
script_tags = soup.find_all('script')
for tag in script_tags:
src = tag.get('src')
if src and src.endswith('.js'):
src = urljoin(base_url, src)
print(f"Found JavaScript: {src}")
js_file.write(src + '\n')
if download:
js_content = download_js_file(src, headers)
if find_endpoints and js_content:
endpoints = extract_endpoints(js_content, src) # Pass the JS file URL as base_url
for endpoint in endpoints:
endpoint_file.write(endpoint + '\n')
for link in soup.find_all('a'):
href = link.get('href')
if href and (href.startswith('http') or href.startswith('/')):
full_url = urljoin(base_url, href)
if urlparse(full_url).netloc == urlparse(base_url).netloc:
fetch_js_files(full_url, visited_urls, download, find_endpoints)
except requests.RequestException as e:
print(f"Error fetching {base_url}: {e}")
def download_js_file(url, headers):
try:
response = requests.get(url, headers=headers)
file_name = url.split('/')[-1]
with open(file_name, 'w') as file:
file.write(response.text)
print(f"Downloaded {file_name}")
return response.text
except requests.RequestException as e:
print(f"Error downloading {url}: {e}")
return None
def extract_endpoints(js_content, base_url):
patterns = [
r"https?://[^\s\"'<>()]+",
r"(?<=['\"])/[^\s\"'<>()]*"
]
raw_endpoints = set()
for pattern in patterns:
raw_endpoints.update(re.findall(pattern, js_content))
endpoints = set()
for endpoint in raw_endpoints:
if endpoint.startswith('/'):
endpoints.add(urljoin(base_url, endpoint))
else:
endpoints.add(endpoint)
return endpoints
def process_input(input_arg, download=False, find_endpoints=False):
if os.path.isfile(input_arg):
with open(input_arg, 'r') as file:
for line in file:
url = line.strip()
if url:
fetch_js_files(url, download=download, find_endpoints=find_endpoints)
else:
fetch_js_files(input_arg, download=download, find_endpoints=find_endpoints)
def main():
parser = argparse.ArgumentParser(description='Fetch JavaScript files from a URL or list of URLs in a text file.')
parser.add_argument('input', type=str, help='A URL or a text file containing URLs')
parser.add_argument('--download', action='store_true', help='Download JavaScript files')
parser.add_argument('--find-endpoints', action='store_true', help='Find and save endpoints')
args = parser.parse_args()
process_input(args.input, download=args.download, find_endpoints=args.find_endpoints)
if __name__ == "__main__":
main()