Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ BITBUCKET_TOKEN=replace-with-your-bitbucket-token
# BITBUCKET_API_BASE_URL=https://api.bitbucket.org/2.0
# BITBUCKET_TIMEOU_SECONDS=30

# # # GitLab provider authentication
# GITLAB_PAT=glpat-xxxxxxxxxxxxxxxxxxxx

# # # Optional GitLab provider runtime settings
# GITLAB_API_BASE_URL=https://gitlab.com
# GITLAB_TIMEOUT_SECONDS=30

# # # Rule-specific examples (not part of core CLI contract)
# LANGUAGE_DETECTION_EXTENSIONS=.ts,.js,.java,.py
# LANGUAGE_REPORT_CSV=/tmp/gitoteko-workspace/languages.csv
Expand Down
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ Rules are implemented with a hexagonal architecture boundary and a strategy patt
- SSH keys already set up for repository cloning
- No external Python dependencies (uses standard library only)

## Supported providers

| Provider | Auth mechanism | Workspace concept |
|------------|---------------------------------------------|-------------------|
| Bitbucket | `BITBUCKET_TOKEN` or username/app-password | Workspace |
| GitLab | `GITLAB_PAT` (Personal Access Token) | Group |
| GitHub | (not yet implemented) | Organization |

## Core behavior

The scanner:
Expand Down Expand Up @@ -56,7 +64,8 @@ pip install -e .
cp .env.example .env
# Edit .env with your credentials:
# - BITBUCKET_TOKEN or BITBUCKET_USERNAME/BITBUCKET_APP_PASSWORD
# - GIT_WORKSPACE (your organization/workspace name)
# - GITLAB_PAT (Personal Access Token with read_api scope)
# - GIT_WORKSPACE (your organization/workspace/group name)
# - BASE_DIR (where repositories will be cloned)
# - SONARQUBE_URL and SONARQUBE_TOKEN (if using Sonar scans)
```
Expand All @@ -70,6 +79,17 @@ set -a && source .env && set +a
PYTHONPATH=src .venv/bin/python -m git_workspace_tool --dry-run
```

### GitLab quick start

```bash
export GIT_PROVIDER=gitlab
export GIT_WORKSPACE=my-gitlab-group
export GITLAB_PAT=glpat-xxxxxxxxxxxxxxxxxxxx
export BASE_DIR=/tmp/gitoteko-workspace

PYTHONPATH=src .venv/bin/python -m git_workspace_tool --dry-run
```

## Real run examples

**Test run with first 3 repositories** (useful when analyzing a whole workspace with many repos):
Expand Down
128 changes: 128 additions & 0 deletions src/git_workspace_tool/adapters/git_providers/gitlab_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from __future__ import annotations

"""GitLab Cloud provider adapter.

Implements GitProviderPort for GitLab.com (or self-hosted) using the REST API v4.
Lists all immediate projects within a group and extracts SSH clone URLs.
"""

import json
from typing import Any, Callable
from urllib.error import HTTPError, URLError
from urllib.parse import quote
from urllib.request import Request, urlopen

from git_workspace_tool.domain.entities import Repository, WorkspaceId
from git_workspace_tool.domain.ports import GitProviderPort


class GitLabCloudGitProviderAdapter(GitProviderPort):
"""Adapter that discovers repositories from a GitLab group via REST API v4.

Why: enables the workspace scanner to operate on GitLab groups the same way
it operates on Bitbucket workspaces, keeping the core use case provider-agnostic.
"""

def __init__(
self,
*,
api_base_url: str = "https://gitlab.com",
token: str | None = None,
timeout_seconds: float = 30.0,
urlopen_fn: Callable[..., Any] = urlopen,
) -> None:
self._api_base_url = api_base_url.rstrip("/")
self._token = token
self._timeout_seconds = timeout_seconds
self._urlopen_fn = urlopen_fn

def list_repositories(self, workspace: WorkspaceId) -> list[Repository]:
"""List all immediate projects in the given GitLab group.

Uses paginated GET /api/v4/groups/:id/projects with per_page=100.
"""
encoded_group = quote(workspace, safe="")
base_url = f"{self._api_base_url}/api/v4/groups/{encoded_group}/projects"

repositories: list[Repository] = []
page = 1

while True:
url = f"{base_url}?per_page=100&page={page}&include_subgroups=false"
items = self._request_json_list(url)

if not items:
break

for item in items:
if not isinstance(item, dict):
continue
repository = self._map_repository(item)
if repository is not None:
repositories.append(repository)

if len(items) < 100:
break

page += 1

return repositories

def get_clone_url(self, repository: Repository) -> str:
"""Return the SSH clone URL stored in the repository entity."""
return repository.clone_url

def _request_json_list(self, url: str) -> list[Any]:
"""Execute GET request and parse JSON array response."""
request = Request(url, headers=self._build_headers())
try:
with self._urlopen_fn(request, timeout=self._timeout_seconds) as response:
content = response.read()
except HTTPError as error:
raise RuntimeError(
f"GitLab API request failed with HTTP {error.code} for URL: {url}"
) from error
except URLError as error:
raise RuntimeError(
f"GitLab API request failed for URL: {url}: {error.reason}"
) from error

try:
parsed = json.loads(content)
except json.JSONDecodeError as error:
raise RuntimeError(
f"Invalid JSON received from GitLab API for URL: {url}"
) from error

if not isinstance(parsed, list):
raise RuntimeError(
"Unexpected GitLab API payload: top-level object must be a JSON array"
)

return parsed

def _build_headers(self) -> dict[str, str]:
"""Build request headers with token authentication."""
headers: dict[str, str] = {"Accept": "application/json"}

if self._token:
headers["PRIVATE-TOKEN"] = self._token

return headers

def _map_repository(self, payload: dict[str, Any]) -> Repository | None:
"""Map a GitLab project JSON object to a domain Repository entity."""
path = payload.get("path")
name = payload.get("name")
ssh_url = payload.get("ssh_url_to_repo")

if not isinstance(path, str) or not path.strip():
return None

repo_slug = path.strip()
repo_name = name.strip() if isinstance(name, str) and name.strip() else repo_slug

if not isinstance(ssh_url, str) or not ssh_url.strip():
return None

return Repository(name=repo_name, slug=repo_slug, clone_url=ssh_url.strip())
15 changes: 15 additions & 0 deletions src/git_workspace_tool/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ class AppConfig:
bitbucket_app_password: str | None
bitbucket_api_base_url: str
bitbucket_timeout_seconds: float
gitlab_token: str | None
gitlab_api_base_url: str
gitlab_timeout_seconds: float


def load_config(args, env: Mapping[str, str]) -> AppConfig:
Expand Down Expand Up @@ -83,6 +86,15 @@ def load_config(args, env: Mapping[str, str]) -> AppConfig:
except ValueError as error:
raise ValueError("BITBUCKET_TIMEOUT_SECONDS must be a number") from error

gitlab_token = _normalize_empty(env.get("GITLAB_PAT"))
gitlab_api_base_url = _normalize_empty(env.get("GITLAB_API_BASE_URL")) or "https://gitlab.com"

raw_gitlab_timeout = _normalize_empty(env.get("GITLAB_TIMEOUT_SECONDS"))
try:
gitlab_timeout_seconds = float(raw_gitlab_timeout) if raw_gitlab_timeout else 30.0
except ValueError as error:
raise ValueError("GITLAB_TIMEOUT_SECONDS must be a number") from error

return AppConfig(
provider=provider,
workspace=workspace,
Expand All @@ -97,6 +109,9 @@ def load_config(args, env: Mapping[str, str]) -> AppConfig:
bitbucket_app_password=bitbucket_app_password,
bitbucket_api_base_url=bitbucket_api_base_url,
bitbucket_timeout_seconds=bitbucket_timeout_seconds,
gitlab_token=gitlab_token,
gitlab_api_base_url=gitlab_api_base_url,
gitlab_timeout_seconds=gitlab_timeout_seconds,
)


Expand Down
27 changes: 18 additions & 9 deletions src/git_workspace_tool/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from git_workspace_tool.adapters.filesystem.local_filesystem import LocalFileSystemAdapter
from git_workspace_tool.adapters.git_client.shell_git_client import ShellGitClientAdapter
from git_workspace_tool.adapters.git_providers.bitbucket_cloud import BitbucketCloudGitProviderAdapter
from git_workspace_tool.adapters.git_providers.gitlab_cloud import GitLabCloudGitProviderAdapter
from git_workspace_tool.application.use_cases.git_workspace_scanner import (
GitWorkspaceScanner,
ScanExecutionSummary,
Expand Down Expand Up @@ -115,17 +116,25 @@ def main() -> int:


def _build_scanner(provider: str, config) -> GitWorkspaceScanner:
if provider != "bitbucket":
if provider == "bitbucket":
provider_adapter = BitbucketCloudGitProviderAdapter(
api_base_url=config.bitbucket_api_base_url,
token=config.bitbucket_token,
username=config.bitbucket_username,
app_password=config.bitbucket_app_password,
timeout_seconds=config.bitbucket_timeout_seconds,
)
elif provider == "gitlab":
if not config.gitlab_token:
raise RuntimeError("GITLAB_PAT is required when using the gitlab provider")
provider_adapter = GitLabCloudGitProviderAdapter(
api_base_url=config.gitlab_api_base_url,
token=config.gitlab_token,
timeout_seconds=config.gitlab_timeout_seconds,
)
else:
raise RuntimeError(f"Provider '{provider}' is not implemented yet")

provider_adapter = BitbucketCloudGitProviderAdapter(
api_base_url=config.bitbucket_api_base_url,
token=config.bitbucket_token,
username=config.bitbucket_username,
app_password=config.bitbucket_app_password,
timeout_seconds=config.bitbucket_timeout_seconds,
)

return GitWorkspaceScanner(
git_provider=provider_adapter,
git_client=ShellGitClientAdapter(),
Expand Down