From d855b039e5dd425110a9896bbc4954080f43c5c0 Mon Sep 17 00:00:00 2001 From: Alonso Utreras Date: Tue, 23 Jun 2026 21:01:48 -0400 Subject: [PATCH] feat: add gitlab to possible providers --- .env.example | 7 + README.md | 22 ++- .../adapters/git_providers/gitlab_cloud.py | 128 ++++++++++++++++++ src/git_workspace_tool/cli/config.py | 15 ++ src/git_workspace_tool/cli/main.py | 27 ++-- 5 files changed, 189 insertions(+), 10 deletions(-) create mode 100644 src/git_workspace_tool/adapters/git_providers/gitlab_cloud.py diff --git a/.env.example b/.env.example index f11bece..d6a846b 100644 --- a/.env.example +++ b/.env.example @@ -35,6 +35,13 @@ BITBUCKET_TOKEN=replace-with-your-bitbucket-token # BITBUCKET_API_BASE_URL=https://api.bitbucket.org/2.0 # BITBUCKET_TIMEOU_SECONDS=30 +# # # GitLab provider authentication +# GITLAB_PAT=glpat-xxxxxxxxxxxxxxxxxxxx + +# # # Optional GitLab provider runtime settings +# GITLAB_API_BASE_URL=https://gitlab.com +# GITLAB_TIMEOUT_SECONDS=30 + # # # Rule-specific examples (not part of core CLI contract) # LANGUAGE_DETECTION_EXTENSIONS=.ts,.js,.java,.py # LANGUAGE_REPORT_CSV=/tmp/gitoteko-workspace/languages.csv diff --git a/README.md b/README.md index afc8e0c..ccf5f97 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,14 @@ Rules are implemented with a hexagonal architecture boundary and a strategy patt - SSH keys already set up for repository cloning - No external Python dependencies (uses standard library only) +## Supported providers + +| Provider | Auth mechanism | Workspace concept | +|------------|---------------------------------------------|-------------------| +| Bitbucket | `BITBUCKET_TOKEN` or username/app-password | Workspace | +| GitLab | `GITLAB_PAT` (Personal Access Token) | Group | +| GitHub | (not yet implemented) | Organization | + ## Core behavior The scanner: @@ -56,7 +64,8 @@ pip install -e . cp .env.example .env # Edit .env with your credentials: # - BITBUCKET_TOKEN or BITBUCKET_USERNAME/BITBUCKET_APP_PASSWORD -# - GIT_WORKSPACE (your organization/workspace name) +# - GITLAB_PAT (Personal Access Token with read_api scope) +# - GIT_WORKSPACE (your organization/workspace/group name) # - BASE_DIR (where repositories will be cloned) # - SONARQUBE_URL and SONARQUBE_TOKEN (if using Sonar scans) ``` @@ -70,6 +79,17 @@ set -a && source .env && set +a PYTHONPATH=src .venv/bin/python -m git_workspace_tool --dry-run ``` +### GitLab quick start + +```bash +export GIT_PROVIDER=gitlab +export GIT_WORKSPACE=my-gitlab-group +export GITLAB_PAT=glpat-xxxxxxxxxxxxxxxxxxxx +export BASE_DIR=/tmp/gitoteko-workspace + +PYTHONPATH=src .venv/bin/python -m git_workspace_tool --dry-run +``` + ## Real run examples **Test run with first 3 repositories** (useful when analyzing a whole workspace with many repos): diff --git a/src/git_workspace_tool/adapters/git_providers/gitlab_cloud.py b/src/git_workspace_tool/adapters/git_providers/gitlab_cloud.py new file mode 100644 index 0000000..07cfae5 --- /dev/null +++ b/src/git_workspace_tool/adapters/git_providers/gitlab_cloud.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +"""GitLab Cloud provider adapter. + +Implements GitProviderPort for GitLab.com (or self-hosted) using the REST API v4. +Lists all immediate projects within a group and extracts SSH clone URLs. +""" + +import json +from typing import Any, Callable +from urllib.error import HTTPError, URLError +from urllib.parse import quote +from urllib.request import Request, urlopen + +from git_workspace_tool.domain.entities import Repository, WorkspaceId +from git_workspace_tool.domain.ports import GitProviderPort + + +class GitLabCloudGitProviderAdapter(GitProviderPort): + """Adapter that discovers repositories from a GitLab group via REST API v4. + + Why: enables the workspace scanner to operate on GitLab groups the same way + it operates on Bitbucket workspaces, keeping the core use case provider-agnostic. + """ + + def __init__( + self, + *, + api_base_url: str = "https://gitlab.com", + token: str | None = None, + timeout_seconds: float = 30.0, + urlopen_fn: Callable[..., Any] = urlopen, + ) -> None: + self._api_base_url = api_base_url.rstrip("/") + self._token = token + self._timeout_seconds = timeout_seconds + self._urlopen_fn = urlopen_fn + + def list_repositories(self, workspace: WorkspaceId) -> list[Repository]: + """List all immediate projects in the given GitLab group. + + Uses paginated GET /api/v4/groups/:id/projects with per_page=100. + """ + encoded_group = quote(workspace, safe="") + base_url = f"{self._api_base_url}/api/v4/groups/{encoded_group}/projects" + + repositories: list[Repository] = [] + page = 1 + + while True: + url = f"{base_url}?per_page=100&page={page}&include_subgroups=false" + items = self._request_json_list(url) + + if not items: + break + + for item in items: + if not isinstance(item, dict): + continue + repository = self._map_repository(item) + if repository is not None: + repositories.append(repository) + + if len(items) < 100: + break + + page += 1 + + return repositories + + def get_clone_url(self, repository: Repository) -> str: + """Return the SSH clone URL stored in the repository entity.""" + return repository.clone_url + + def _request_json_list(self, url: str) -> list[Any]: + """Execute GET request and parse JSON array response.""" + request = Request(url, headers=self._build_headers()) + try: + with self._urlopen_fn(request, timeout=self._timeout_seconds) as response: + content = response.read() + except HTTPError as error: + raise RuntimeError( + f"GitLab API request failed with HTTP {error.code} for URL: {url}" + ) from error + except URLError as error: + raise RuntimeError( + f"GitLab API request failed for URL: {url}: {error.reason}" + ) from error + + try: + parsed = json.loads(content) + except json.JSONDecodeError as error: + raise RuntimeError( + f"Invalid JSON received from GitLab API for URL: {url}" + ) from error + + if not isinstance(parsed, list): + raise RuntimeError( + "Unexpected GitLab API payload: top-level object must be a JSON array" + ) + + return parsed + + def _build_headers(self) -> dict[str, str]: + """Build request headers with token authentication.""" + headers: dict[str, str] = {"Accept": "application/json"} + + if self._token: + headers["PRIVATE-TOKEN"] = self._token + + return headers + + def _map_repository(self, payload: dict[str, Any]) -> Repository | None: + """Map a GitLab project JSON object to a domain Repository entity.""" + path = payload.get("path") + name = payload.get("name") + ssh_url = payload.get("ssh_url_to_repo") + + if not isinstance(path, str) or not path.strip(): + return None + + repo_slug = path.strip() + repo_name = name.strip() if isinstance(name, str) and name.strip() else repo_slug + + if not isinstance(ssh_url, str) or not ssh_url.strip(): + return None + + return Repository(name=repo_name, slug=repo_slug, clone_url=ssh_url.strip()) diff --git a/src/git_workspace_tool/cli/config.py b/src/git_workspace_tool/cli/config.py index 07c39cd..e6a2903 100644 --- a/src/git_workspace_tool/cli/config.py +++ b/src/git_workspace_tool/cli/config.py @@ -23,6 +23,9 @@ class AppConfig: bitbucket_app_password: str | None bitbucket_api_base_url: str bitbucket_timeout_seconds: float + gitlab_token: str | None + gitlab_api_base_url: str + gitlab_timeout_seconds: float def load_config(args, env: Mapping[str, str]) -> AppConfig: @@ -83,6 +86,15 @@ def load_config(args, env: Mapping[str, str]) -> AppConfig: except ValueError as error: raise ValueError("BITBUCKET_TIMEOUT_SECONDS must be a number") from error + gitlab_token = _normalize_empty(env.get("GITLAB_PAT")) + gitlab_api_base_url = _normalize_empty(env.get("GITLAB_API_BASE_URL")) or "https://gitlab.com" + + raw_gitlab_timeout = _normalize_empty(env.get("GITLAB_TIMEOUT_SECONDS")) + try: + gitlab_timeout_seconds = float(raw_gitlab_timeout) if raw_gitlab_timeout else 30.0 + except ValueError as error: + raise ValueError("GITLAB_TIMEOUT_SECONDS must be a number") from error + return AppConfig( provider=provider, workspace=workspace, @@ -97,6 +109,9 @@ def load_config(args, env: Mapping[str, str]) -> AppConfig: bitbucket_app_password=bitbucket_app_password, bitbucket_api_base_url=bitbucket_api_base_url, bitbucket_timeout_seconds=bitbucket_timeout_seconds, + gitlab_token=gitlab_token, + gitlab_api_base_url=gitlab_api_base_url, + gitlab_timeout_seconds=gitlab_timeout_seconds, ) diff --git a/src/git_workspace_tool/cli/main.py b/src/git_workspace_tool/cli/main.py index 93b7366..d571d29 100644 --- a/src/git_workspace_tool/cli/main.py +++ b/src/git_workspace_tool/cli/main.py @@ -9,6 +9,7 @@ from git_workspace_tool.adapters.filesystem.local_filesystem import LocalFileSystemAdapter from git_workspace_tool.adapters.git_client.shell_git_client import ShellGitClientAdapter from git_workspace_tool.adapters.git_providers.bitbucket_cloud import BitbucketCloudGitProviderAdapter +from git_workspace_tool.adapters.git_providers.gitlab_cloud import GitLabCloudGitProviderAdapter from git_workspace_tool.application.use_cases.git_workspace_scanner import ( GitWorkspaceScanner, ScanExecutionSummary, @@ -115,17 +116,25 @@ def main() -> int: def _build_scanner(provider: str, config) -> GitWorkspaceScanner: - if provider != "bitbucket": + if provider == "bitbucket": + provider_adapter = BitbucketCloudGitProviderAdapter( + api_base_url=config.bitbucket_api_base_url, + token=config.bitbucket_token, + username=config.bitbucket_username, + app_password=config.bitbucket_app_password, + timeout_seconds=config.bitbucket_timeout_seconds, + ) + elif provider == "gitlab": + if not config.gitlab_token: + raise RuntimeError("GITLAB_PAT is required when using the gitlab provider") + provider_adapter = GitLabCloudGitProviderAdapter( + api_base_url=config.gitlab_api_base_url, + token=config.gitlab_token, + timeout_seconds=config.gitlab_timeout_seconds, + ) + else: raise RuntimeError(f"Provider '{provider}' is not implemented yet") - provider_adapter = BitbucketCloudGitProviderAdapter( - api_base_url=config.bitbucket_api_base_url, - token=config.bitbucket_token, - username=config.bitbucket_username, - app_password=config.bitbucket_app_password, - timeout_seconds=config.bitbucket_timeout_seconds, - ) - return GitWorkspaceScanner( git_provider=provider_adapter, git_client=ShellGitClientAdapter(),