Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions contribution_analysis.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
GitHub 贡献分析模块
通过 GitHub API 统计仓库中各贡献者的提交数量,并导出为 CSV 文件。
"""
import subprocess
import argparse
import csv
Expand All @@ -10,7 +14,7 @@


def get_login_by_sha(sha: str, repo: str, token: str,
cache: dict[str, Optional[str]]) -> Optional[str]:
cache: dict[str, Optional[str]]) -> Optional[str]:
"""Get GitHub login ID by commit SHA with caching.

Args:
Expand All @@ -24,7 +28,6 @@ def get_login_by_sha(sha: str, repo: str, token: str,
"""
if sha in cache:
return cache[sha]

url = f"https://api.github.com/repos/{repo}/commits/{sha}"
headers = {"Authorization": f"token {token}"}
try:
Expand All @@ -37,7 +40,7 @@ def get_login_by_sha(sha: str, repo: str, token: str,
cache[sha] = login
return login
except requests.RequestException as e:
print(f"SHA查询异常({sha}): {e}")
print(f"SHA query failed ({sha}): {e}")
return None


Expand All @@ -56,7 +59,7 @@ def load_ignore_users(file_path: str) -> set[str]:
with open(file_path, 'r', encoding='utf-8') as f:
return {str(u).strip().lower() for u in json.load(f)}
except (json.JSONDecodeError, IOError) as e:
print(f"加载屏蔽名单失败: {e}")
print(f"Failed to load ignore list: {e}")
return set()


Expand All @@ -82,15 +85,14 @@ def run_analysis() -> None:

result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
print("获取Git日志失败")
print("Failed to retrieve Git log")
return

shas = [s.strip() for s in result.stdout.split('\n') if s.strip()]
login_counts: Counter[str] = Counter()
sha_to_login_cache: dict[str, Optional[str]] = {}

print(f"检测到 {len(shas)} 个提交,正在追溯归属...")

print(f"Detected {len(shas)} commits, resolving authors...")
for sha in shas:
login = get_login_by_sha(sha, args.repo, args.token, sha_to_login_cache)
if login and login.lower() not in ignore_set:
Expand All @@ -102,8 +104,9 @@ def run_analysis() -> None:
writer = csv.writer(f)
writer.writerow(["GitHub_Login", "Commits"])
writer.writerows(sorted_stats)
print(f"分析完成,导出至 {args.output}")

print(f"Analysis complete, exported to {args.output}")


if __name__ == "__main__":
run_analysis()
run_analysis()
31 changes: 31 additions & 0 deletions docs/gaussian_mixture.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,34 @@
- 异常检测
- 图像分割
- 语音识别
-
## 数学公式

GMM 的概率密度函数为:

`p(x) = Σ π_k * N(x | μ_k, Σ_k)`

其中:
- K 为高斯成分数量
- π_k 为第 k 个成分的混合权重,满足 Σπ_k = 1
- N(x | μ_k, Σ_k) 为第 k 个高斯分布

## 代码示例

使用 scikit-learn 拟合高斯混合模型:

```python
from sklearn.mixture import GaussianMixture
import numpy as np

# 生成示例数据
X = np.random.randn(300, 2)

# 创建并训练模型
gmm = GaussianMixture(n_components=3, random_state=0)
gmm.fit(X)

# 预测类别
labels = gmm.predict(X)
print("各成分权重:", gmm.weights_)
```
11 changes: 7 additions & 4 deletions ignore_users.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
[
"Haidong Wang",
"donghaiwang",
"whd@hutb.edu.cn"
]
{
"name": "Haidong Wang",
"github": "donghaiwang",
"email": "whd@hutb.edu.cn",
"role": "author"
}
]