Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions .github/workflows/get_papi_github_statistics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
name: PAPI GitHub Statistics

# parser.add_argument("--filenames", default = None, help = "Name of file(s) to save the plots under. If all is provided to --plots-to-generate you must provide TWO filenames and list them as comma separated.")

on:
workflow_dispatch:
inputs:
starting_release:
type: string
description: "The release you want to begin plotting from. Default is 0 and corresponds to the first PAPI GitHub release."
default: "0"
required: false
plot_to_generate:
type: choice
description: "Type of plot to generate. Default is all."
default: "all"
options:
- all
- bar
- line
required: false
figsize:
type: string
description: "Size of the generated figure(s). Must bein the format width_size,height_size."
default: "12,5"
required: false
colors:
type: string
description: "Colors to be used in the generated plot. You can pass a colormap or list of colors. In the case of a list of colors, they must be comma separated and the number provided must match the number of releases you wish to plot."
default: "viridis"
required: false
fontsize:
type: string
description: "Fontsizes for the plots x and y ticks/labels and title."
default: "14"
barplot_kwargs:
type: string
description: "Keyword arguments for the generated barplot"
required: false
lineplot_kwargs:
type: string
description: "Keyword arguments for the generated lineplot"
required: false
scatterplot_kwargs:
type: string
description: "Keyword arguments for the generated scatterplot"
required: false

jobs:
get_papi_github_statistics:
runs-on: ubuntu-latest
steps:
- name: Setup Python Dependency
uses: actions/setup-python@v6
with:
python-version: "3.14"

- name: Install Matplotlib
run: |
pip install matplotlib

- name: Checkout
uses: actions/checkout@v6

- name: Plot PAPI GitHub Statistics
run: >
python3 .github/workflows_scripts/ga_papi_github_download_count_and_unique_clones.py
--starting-release ${{ github.event.inputs.starting_release }}
--plot-to-generate ${{ github.event.inputs.plots_to_generate }}
--figsize ${{ github.event.inputs.figsize }}
--colors ${{ github.event.inputs.colors }}
--fontsize ${{ github.event.inputs.fontsize }}
#--barplot-kwargs ${{ github.event.inputs.barplot_kwargs }}
#--lineplot-kwargs ${{ github.event.inputs.lineplot_kwargs }}
#--scatterplot-kwargs ${{ github.event.inputs.scatterplot_kwargs }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}


Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
#!/usr/bin/env python3

import matplotlib as mpl
import matplotlib.pyplot as plt
import json
import subprocess
import numpy as np
import argparse
import datetime


'''
Notes:
Returns a list of releases
Seems that with json it can have multiple sections
Have to index into the sections
'''

#TODO: 1. Incorporate starting release argument, how can I properly offset the annotation? Can the annotation be used for the barplot as well?

def setup_args() -> argparse.ArgumentParser:
"""Setup the command line interface.

:returns: An instance of argparse.ArgumentParser.
:rtype: argparse.ArgumentParser
"""
parser = argparse.ArgumentParser()
parser.add_argument("--starting-release", default = 0, help = "The release you want to begin plotting from. Default is 0 and correponds to the first PAPI GitHub release.")
parser.add_argument("--plot-to-generate", default = "all", help = "Type of plot to generate. Options include [all (default), bar, or line].")
parser.add_argument("--figsize", default = "12,5", help = "Size of the generated figure. Must be in the format width_size,height_size.")
parser.add_argument("--colors", default = "viridis", help = "Colors to be used in the generated plot. You can pass a colormap or list of colors. In the case of a list of colors, they must be comma separated and the number provided must match the number of releases you wish to plot.")
parser.add_argument("--fontsize", default = 14, help = "Fontsizes for the plots x and y ticks/labels and title.")
parser.add_argument("--filenames", default = None, help = "Name of file(s) to save the plots under. If all is provided to --plots-to-generate you must provide TWO filenames and list them as comma separated.")
parser.add_argument("--barplot-kwargs", default = None, help = "Keyword arguments for the generated barplot.")
parser.add_argument("--lineplot-kwargs", default = None, help = "Keyword arguments for the generated lineplot.")
parser.add_argument("--scatterplot-kwargs", default = None, help = "Keyword arguments for the generated scatterplot.")

return parser

def get_papi_release_download_count() -> tuple[int, list[str], list[int]]:
"""For each PAPI release on GitHub get the download count.

:returns: A tuple containing number of releases, name of releases, and the download count per release.
:rtype: tuple
"""
repo_owner = "icl-utk-edu"
repo = "papi"

#TODO: Need to load gh, I will have to check to make sure that a user has this loaded.
#TODO: Maybe make it as a list?
papi_release_info_json = subprocess.run(f"gh api -H 'Accept: application/vnd.github+json' -H 'X-GitHub-Api-Version: 2026-03-10' /repos/{repo_owner}/{repo}/releases", shell = True, capture_output = True, text = True)
papi_release_info_python = json.loads(papi_release_info_json.stdout)

names_of_releases = []
number_of_downloads_per_release = []
#a list, that contains a dictionary, that dictionary has keys, a key is assets, which contains a list, which contains another dictionary
for release_entry in papi_release_info_python:
names_of_releases.insert(0, release_entry["name"])
number_of_downloads_per_release.insert(0, release_entry["assets"][0]["download_count"])

return len(names_of_releases), names_of_releases, number_of_downloads_per_release

def get_papi_unique_clones() -> tuple[int, list[str], list[int]]:
"""For the last 14 days get the total number of unique clones.

:returns: A tuple containing the number of timestamps, data of the timestamps, and number of unique clones per timestamp.
:rtype: tuple
"""
repo_owner = "icl-utk-edu"
repo = "papi"

papi_traffic_json = subprocess.run(f"gh api -H 'Accept: application/vnd.github+json' -H 'X-GitHub-Api-Version: 2026-03-10' /repos/{repo_owner}/{repo}/traffic/clones", shell = True, capture_output = True, text = True)
papi_traffic_python = json.loads(papi_traffic_json.stdout)

print(papi_traffic_python)

timestamps = []
number_of_unique_clones = []
# List of dictionaries containing the time stamps and the unique download counts
for clone_entry in papi_traffic_python["clones"]:
date, _ = clone_entry["timestamp"].split("T")
timestamps.append(date)
number_of_unique_clones.append(clone_entry["uniques"])

return len(timestamps), timestamps, number_of_unique_clones

def plot_papi_github_statistics(data_for_the_x_axis: list, data_for_the_y_axis: list, parsed_command_line_args: tuple) -> None:
"""Plot either the PAPI GitHub release download count or PAPI GitHub unique clones.
Plots that can be generated include bar or line.

:param data_for_the_x_axis: Either a list of PAPI GitHub releases or timestamps in the format YYYY-MM-DD.
:type data_for_x_axis: list
:param data_for_the_y_axis: Either a list of PAPI GitHub release download counts or number of unique clones.
:type data_for_y_axis: list
:param parsed_command_line_args: A tuple containing the parsed arguments from the command line interface.
:type parsed_command_line_args: tuple
"""
# Unpack the tuple of arguments that were created in parse_args
plots_to_generate, filenames, barplot_kwargs, lineplot_kwargs, scatterplot_kwargs, plots_fontsize, plots_figsize = parsed_command_line_args
fig, ax = plt.subplots(figsize = plots_figsize)
for plot,filename in zip(plots_to_generate, filenames):
# Showcase the PAPI GitHub download count via a barplot
if plot == "bar":
bars = ax.bar(data_for_the_x_axis, data_for_the_y_axis, **barplot_kwargs)
ax.bar_label(bars, padding = 3, fontsize = plots_fontsize)
# Showcase the PAPI GitHub download count via a lineplot
elif plot == "line":
ax.plot(data_for_the_x_axis, data_for_the_y_axis, zorder = 0, **lineplot_kwargs)
ax.scatter(data_for_the_x_axis, data_for_the_y_axis, zorder = 1, **scatterplot_kwargs)
# Add annotations
for name, count in zip(data_for_the_x_axis, data_for_the_y_axis):
ax.annotate(f"{count}", xy=(name, count + 70), ha = "center", fontsize = plots_fontsize)
# Plot option has yet to be implemented
else:
raise NotImplementedError

# Handle the figures title
plots_title = "The Number of Downloads per PAPI Release via GitHub"
ax.set_title(plots_title, fontsize = plots_fontsize)

# Handle the figures y-axis
plots_ylabel = "Number of Downloads"
yaxis_stepsize = 300
yaxis_current_max = max(number_of_downloads_per_release)
## The y-axis max is updated to be + yaxis_stepsize such that
## a value is placed at the top left corner of the plot
yaxis_updated_max = yaxis_current_max + yaxis_stepsize
ax.set_yticks(np.arange(0, yaxis_updated_max, yaxis_stepsize))
ax.tick_params(axis = "y", labelsize = plots_fontsize)
ax.set_ylabel(plots_ylabel, fontsize = plots_fontsize)

# Handle the figures x-axis
plots_xlabel = "PAPI Releases"
ax.set_xlabel(plots_xlabel, fontsize = plots_fontsize)
ax.tick_params(axis = "x", labelsize = plots_fontsize)

# Save the figure
#fig.tight_layout()
#fig.savefig(filename)

def parse_args(cmd_line_args: argparse.Namespace, number_of_papi_releases_on_gh: int):
"""Parse the command line interface args and assign defaults if necessary.

:param cmd_line_args
:type cmd_line_args: argparse.Namespace
:param
:type
:returns:
:rtype:
"""
# Handle the arg --plot-to-generate
list_of_plots = None
if cmd_line_args.plot_to_generate == "all":
list_of_plots = ["bar", "line"]
elif cmd_line_args.plot_to_generate == "bar":
list_of_plots = ["bar"]
else:
list_of_plots = ["line"]

# Handle the arg --filenames
## If the argument --filenames has been provided then
## convert them to a list
list_of_filenames = []
if cmd_line_args.filenames:
seperator = ","
for filename in cmd_line_args.filenames.split(seperator):
list_of_filenames.append(filename)
## If the argument --filename has not been provided then
## create our own
else:
for plot in list_of_plots:
list_of_filenames.append(f"number_of_downloads_per_papi_release_via_github_{plot}")

# Number of plots and number of filenames must match
if len(list_of_plots) != len(list_of_filenames):
raise ValueError(f"The number of plots ({len(list_of_plots)}) does not match then number of filenames ({len(list_of_filenames)})")

# Handle the arg --figsize
seperator = ","
width, height = cmd_line_args.figsize.split(seperator)
figsize = (int(width), int(height))


colors = None
# Handle the arg --colors
if cmd_line_args.colors:
## A list of colors were provided
if "," in cmd_line_args.colors:
seperator = ","
colors = cmd_line_args.colors.split(seperator)
if len(colors) != number_of_papi_releases_on_gh:
raise ValueError(f"A total of {len(colors)} colors were provided to --colors, but {number_of_papi_releases_on_gh} releases are being plotted.")
else:
try:
cmp = mpl.colormaps[cmd_line_args.colors]
except KeyError as e:
e.add_note(f"The colormap {cmd_line_args.colors} provided to --colors is not actually a colormap.")
raise
colors = cmp(np.linspace(0, 1, number_of_papi_releases_on_gh))

# Handle the arg --barplot-kwargs
barplot_kwargs = None
## If the argument --barplot-kwargs has been provied then
## convert the JSON to a Python dictionary to be used
if cmd_line_args.barplot_kwargs:
barplot_kwargs = json.loads(cmd_line_args.barplot_kwargs)
# If the argument --barplot-kwargs has not been provied then
# use default
else:
barplot_kwargs = {
"edgecolor": "black",
"linewidth": 1.5,
"color": colors,
}

# Handle the arg --lineplot-kwargs
lineplot_kwargs = None
## If the argument --lineplot-kwargs has been provied then
## convert the JSON to a Python dictionary to be used
if cmd_line_args.lineplot_kwargs:
lineplot_kwargs = json.loads(cmd_line_args.lineplot_kwargs)
## If the argument --lineplot-kwargs has not been provied then
## use default
else:
lineplot_kwargs = {
"linewidth": 3,
"color": "black",
}

# Handle the arg --scatterplot-kwargs
scatterplot_kwargs = None
## If the argument --scatterplot-kwargs has been provied then
## convert the JSON to a Python dictionary to be used
if cmd_line_args.scatterplot_kwargs:
scatterplot_kwargs = json.loads(cmd_line_args.scatterplot_kwargs)
## If the argument --scatterplot-kwargs has not been provied then
## use default
else:
scatterplot_kwargs = {
"marker": "o",
"s": 200,
"color": colors,
"edgecolor": "black",
"linewidth": 2,
}

return list_of_plots, list_of_filenames, barplot_kwargs, lineplot_kwargs, scatterplot_kwargs, cmd_line_args.fontsize, figsize


#TODO: I will just make this an action.
if __name__ == "__main__":
# Get the GitHub release metadata
number_of_papi_releases_on_gh, names_of_papi_releases, number_of_downloads_per_release = get_papi_release_download_count()

#number_of_entries, timestamps, clones = get_papi_unique_clones()

# Parse the arguments
args = setup_args()
tuple_of_command_line_args = parse_args(args.parse_args(), number_of_papi_releases_on_gh)
#tuple_of_command_line_args = parse_args(args.parse_args(), number_of_entries)

#plot_papi_github_statistics(timestamps, clones, tuple_of_command_line_args)
plot_papi_github_statistics(names_of_papi_releases, number_of_downloads_per_release, tuple_of_command_line_args)


Loading