-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
132 lines (102 loc) · 4 KB
/
utils.py
File metadata and controls
132 lines (102 loc) · 4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
from icecream import ic as logger
from datetime import datetime
import re
from pydriller import Repository
if not os.path.exists('./logs'):
os.makedirs('./logs')
# Define a file to log IceCream output
log_file_path = os.path.join('./logs', f'{datetime.now().strftime("%Y-%m-%d-%H:%M:%S")}.log')
# Replace logging configuration with IceCream configuration
logger.configureOutput(prefix=' - ', outputFunction=lambda x: open(log_file_path, 'a').write(x + '\n'))
def get_first_valid_github_commit_link(data_list):
github_commit_pattern = r'https?://github\.com/.+?/commit/[a-f0-9]+'
for entry in data_list:
if 'url' in entry:
url = entry['url']
if re.match(github_commit_pattern, url):
return url
return None
def remove_redundance_info(data_list):
logger.disable()
new_data_list = []
for data in data_list:
new_data = {}
new_data['cve-id'] = data['cveMetadata']['cveId']
new_data['description'] = data['containers']['cna']['descriptions'][0]['value']
new_data['published-date'] = data['cveMetadata']['datePublished']
try:
new_data['last-modified-date'] = data['cveMetadata']['dateUpdated']
except Exception as e:
logger(f"{data['cveMetadata']['cveId']} - {e}")
new_data['last-modified-date'] = ''
new_data['references'] = data['containers']['cna']['references']
try:
new_data['vulnerability-type'] = {
'id': data['containers']['cna']['problemTypes'][0]['descriptions'][0]['cweID'],
'description': data['containers']['cna']['problemTypes'][0]['descriptions'][0]['description']
}
except Exception as e:
logger(f"{data['cveMetadata']['cveId']} - {e}")
new_data['vulnerability-type'] = {
'id': 'NVD-CWE-noinfo',
'description': 'unknown'
}
new_data_list.append(new_data)
return new_data_list
def get_language_from_extension(file_name):
file_extension = file_name.split('.')[-1].lower()
language_mapping = {
'py': 'Python',
'java': 'Java',
'cpp': 'C++',
'c': 'C',
'html': 'HTML',
'css': 'CSS',
'js': 'JavaScript',
'rb': 'Ruby',
'php': 'PHP',
'swift': 'Swift',
# Add more extensions and corresponding languages as needed
}
return language_mapping.get(file_extension, 'Unknown')
def process_code_change(commit):
code_change = []
for file in commit:
file_name = file.filename
language = get_language_from_extension(file_name)
code_file = {
'file_name': file_name,
'language': language,
'diff_parsed': file.diff_parsed,
'before': file.source_code,
'after': file.source_code_before
}
code_change.append(code_file)
return code_change
def get_commit_metadata(github_commit_link):
# Extract owner, repo, and hash from the GitHub commit link
# Example: https://github.com/owner/repo/commit/hash
parts = github_commit_link.split('/')
owner = parts[3]
repo = parts[4]
commit_hash = parts[6]
# Repository object
repo = Repository(f'https://github.com/{owner}/{repo}', only_commits=[commit_hash])
for commit in repo.traverse_commits():
code_change = process_code_change(commit.modified_files)
return {
'commit_hash': commit.hash,
'author_name': commit.author.name,
'author_email': commit.author.email,
'date': commit.author_date,
'message': commit.msg,
'parents': commit.parents,
'code_change': code_change,
}
def extract_commit_code_change(data_list):
for data in data_list:
github_commit_link = get_first_valid_github_commit_link(data['references'])
metadata = get_commit_metadata(github_commit_link)
data['commit_info'] = metadata
return data_list