-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Expand file tree
/
Copy pathUrlTools.cpp
More file actions
224 lines (189 loc) · 6.93 KB
/
UrlTools.cpp
File metadata and controls
224 lines (189 loc) · 6.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/*
* Copyright (C) 2026 KeePassXC Team <team@keepassxc.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 or (at your option)
* version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "UrlTools.h"
#if defined(WITH_XC_NETWORKING) || defined(WITH_XC_BROWSER)
#include <QHostAddress>
#include <QNetworkCookie>
#include <QNetworkCookieJar>
#endif
#include <QRegularExpression>
#include <QUrl>
const QString UrlTools::URL_WILDCARD = "1kpxcwc1";
Q_GLOBAL_STATIC(UrlTools, s_urlTools)
UrlTools* UrlTools::instance()
{
return s_urlTools;
}
QUrl UrlTools::convertVariantToUrl(const QVariant& var) const
{
QUrl url;
if (var.canConvert<QUrl>()) {
url = var.toUrl();
}
return url;
}
#if defined(WITH_XC_NETWORKING) || defined(WITH_XC_BROWSER)
QUrl UrlTools::getRedirectTarget(QNetworkReply* reply) const
{
QVariant var = reply->attribute(QNetworkRequest::RedirectionTargetAttribute);
QUrl url = convertVariantToUrl(var);
return url;
}
/**
* Gets the base domain of URL or hostname.
*
* If returnOnlyLabel is true, return only the Registrable Origin Label:
* https://www.w3.org/TR/webauthn-3/#registrable-origin-label
*
* Returns the base domain, e.g. https://another.example.co.uk -> example.co.uk
* Up-to-date list can be found: https://publicsuffix.org/list/public_suffix_list.dat
*/
QString UrlTools::getBaseDomainFromUrl(const QString& url, bool returnOnlyLabel) const
{
auto qUrl = QUrl::fromUserInput(url);
auto host = qUrl.host();
if (isIpAddress(host)) {
return host;
}
const auto tld = getTopLevelDomainFromUrl(qUrl.toString());
if (tld.isEmpty() || tld.length() + 1 >= host.length()) {
return host;
}
// Remove the top level domain part from the hostname, e.g. https://another.example.co.uk -> https://another.example
host.chop(tld.length() + 1);
// Split the URL and select the last part, e.g. https://another.example -> example
QString baseDomain = host.split('.').last();
if (!returnOnlyLabel) {
// Append the top level domain back to the URL, e.g. example -> example.co.uk
baseDomain.append(QString(".%1").arg(tld));
}
return baseDomain;
}
/**
* Gets the top level domain from URL.
*
* Returns the TLD e.g. https://another.example.co.uk -> co.uk
*/
QString UrlTools::getTopLevelDomainFromUrl(const QString& url) const
{
auto host = QUrl::fromUserInput(url).host();
if (isIpAddress(host)) {
return host;
}
const auto numberOfDomainParts = host.split('.').length();
static const auto dummy = QByteArrayLiteral("");
// Only loop the amount of different parts found
for (auto i = 0; i < numberOfDomainParts; ++i) {
// Cut the first part from host
host = host.mid(host.indexOf('.') + 1);
QNetworkCookie cookie(dummy, dummy);
cookie.setDomain(host);
// Check if dummy cookie's domain/TLD matches with public suffix list
if (!QNetworkCookieJar{}.setCookiesFromUrl(QList{cookie}, QUrl::fromUserInput(url))) {
return host;
}
}
return host;
}
bool UrlTools::isIpAddress(const QString& host) const
{
// Handle IPv6 host with brackets, e.g [::1]
const auto hostAddress = host.startsWith('[') && host.endsWith(']') ? host.mid(1, host.length() - 2) : host;
QHostAddress address(hostAddress);
return address.protocol() == QAbstractSocket::IPv4Protocol || address.protocol() == QAbstractSocket::IPv6Protocol;
}
#endif
// Returns true if URLs are identical. Paths with "/" are removed during comparison.
// URLs without scheme reverts to https.
// Special handling is needed because QUrl::matches() with QUrl::StripTrailingSlash does not strip "/" paths.
bool UrlTools::isUrlIdentical(const QString& first, const QString& second) const
{
auto trimUrl = [](QString url) {
url = url.trimmed();
if (url.endsWith("/")) {
url.remove(url.length() - 1, 1);
}
return url;
};
if (first.isEmpty() || second.isEmpty()) {
return false;
}
// Replace URL wildcards for comparison if found
const auto firstUrl = trimUrl(QString(first).replace("*", UrlTools::URL_WILDCARD));
const auto secondUrl = trimUrl(QString(second).replace("*", UrlTools::URL_WILDCARD));
if (firstUrl == secondUrl) {
return true;
}
return QUrl(firstUrl).matches(QUrl(secondUrl), QUrl::StripTrailingSlash);
}
bool UrlTools::isUrlValid(const QString& urlField, bool looseComparison) const
{
if (urlField.isEmpty() || urlField.startsWith("cmd://", Qt::CaseInsensitive)
|| urlField.startsWith("kdbx://", Qt::CaseInsensitive) || urlField.startsWith("{REF:A", Qt::CaseInsensitive)) {
return true;
}
auto url = urlField;
// Loose comparison that allows wildcards and exact URL inside " characters
if (looseComparison) {
// Exact URL
if (url.startsWith("\"") && url.endsWith("\"")) {
// Do not allow exact URL with wildcards, or empty exact URL
if (url.contains("*") || url.length() == 2) {
return false;
}
// Get the URL inside ""
url.remove(0, 1);
url.remove(url.length() - 1, 1);
} else {
// Do not allow URL with just wildcards, or double wildcards
if (url.length() == url.count("*") || url.contains("**") || url.contains("*.*")) {
return false;
}
url.replace("*", UrlTools::URL_WILDCARD);
}
}
QUrl qUrl;
if (urlField.contains("://")) {
qUrl = url;
} else {
qUrl = QUrl::fromUserInput(url);
}
if (qUrl.scheme() != "file" && qUrl.host().isEmpty()) {
return false;
}
#if defined(WITH_XC_NETWORKING) || defined(WITH_XC_BROWSER)
// Prevent TLD wildcards
if (looseComparison && url.contains(UrlTools::URL_WILDCARD)) {
const auto tld = getTopLevelDomainFromUrl(url);
if (tld.contains(UrlTools::URL_WILDCARD) || qUrl.host() == QString("%1.%2").arg(UrlTools::URL_WILDCARD, tld)) {
return false;
}
}
#endif
// Check for illegal characters. Adds also the wildcard * to the list
QRegularExpression re("[<>\\^`{|}\\*]");
auto match = re.match(url);
if (match.hasMatch()) {
return false;
}
return true;
}
bool UrlTools::domainHasIllegalCharacters(const QString& domain) const
{
QRegularExpression re(R"([\s\^#|/:<>\?@\[\]\\])");
return re.match(domain).hasMatch();
}