diff --git a/.github/workflows/get_papi_github_statistics.yml b/.github/workflows/get_papi_github_statistics.yml new file mode 100644 index 0000000..a37e9a4 --- /dev/null +++ b/.github/workflows/get_papi_github_statistics.yml @@ -0,0 +1,79 @@ +name: PAPI GitHub Statistics + +# parser.add_argument("--filenames", default = None, help = "Name of file(s) to save the plots under. If all is provided to --plots-to-generate you must provide TWO filenames and list them as comma separated.") + +on: + workflow_dispatch: + inputs: + starting_release: + type: string + description: "The release you want to begin plotting from. Default is 0 and corresponds to the first PAPI GitHub release." + default: "0" + required: false + plot_to_generate: + type: choice + description: "Type of plot to generate. Default is all." + default: "all" + options: + - all + - bar + - line + required: false + figsize: + type: string + description: "Size of the generated figure(s). Must bein the format width_size,height_size." + default: "12,5" + required: false + colors: + type: string + description: "Colors to be used in the generated plot. You can pass a colormap or list of colors. In the case of a list of colors, they must be comma separated and the number provided must match the number of releases you wish to plot." + default: "viridis" + required: false + fontsize: + type: string + description: "Fontsizes for the plots x and y ticks/labels and title." + default: "14" + barplot_kwargs: + type: string + description: "Keyword arguments for the generated barplot" + required: false + lineplot_kwargs: + type: string + description: "Keyword arguments for the generated lineplot" + required: false + scatterplot_kwargs: + type: string + description: "Keyword arguments for the generated scatterplot" + required: false + +jobs: + get_papi_github_statistics: + runs-on: ubuntu-latest + steps: + - name: Setup Python Dependency + uses: actions/setup-python@v6 + with: + python-version: "3.14" + + - name: Install Matplotlib + run: | + pip install matplotlib + + - name: Checkout + uses: actions/checkout@v6 + + - name: Plot PAPI GitHub Statistics + run: > + python3 .github/workflows_scripts/ga_papi_github_download_count_and_unique_clones.py + --starting-release ${{ github.event.inputs.starting_release }} + --plot-to-generate ${{ github.event.inputs.plots_to_generate }} + --figsize ${{ github.event.inputs.figsize }} + --colors ${{ github.event.inputs.colors }} + --fontsize ${{ github.event.inputs.fontsize }} + #--barplot-kwargs ${{ github.event.inputs.barplot_kwargs }} + #--lineplot-kwargs ${{ github.event.inputs.lineplot_kwargs }} + #--scatterplot-kwargs ${{ github.event.inputs.scatterplot_kwargs }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + diff --git a/.github/workflows_scripts/ga_papi_github_download_count_and_unique_clones.py b/.github/workflows_scripts/ga_papi_github_download_count_and_unique_clones.py new file mode 100644 index 0000000..e599b8d --- /dev/null +++ b/.github/workflows_scripts/ga_papi_github_download_count_and_unique_clones.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 + +import matplotlib as mpl +import matplotlib.pyplot as plt +import json +import subprocess +import numpy as np +import argparse +import datetime + + +''' +Notes: +Returns a list of releases +Seems that with json it can have multiple sections +Have to index into the sections +''' + +#TODO: 1. Incorporate starting release argument, how can I properly offset the annotation? Can the annotation be used for the barplot as well? + +def setup_args() -> argparse.ArgumentParser: + """Setup the command line interface. + + :returns: An instance of argparse.ArgumentParser. + :rtype: argparse.ArgumentParser + """ + parser = argparse.ArgumentParser() + parser.add_argument("--starting-release", default = 0, help = "The release you want to begin plotting from. Default is 0 and correponds to the first PAPI GitHub release.") + parser.add_argument("--plot-to-generate", default = "all", help = "Type of plot to generate. Options include [all (default), bar, or line].") + parser.add_argument("--figsize", default = "12,5", help = "Size of the generated figure. Must be in the format width_size,height_size.") + parser.add_argument("--colors", default = "viridis", help = "Colors to be used in the generated plot. You can pass a colormap or list of colors. In the case of a list of colors, they must be comma separated and the number provided must match the number of releases you wish to plot.") + parser.add_argument("--fontsize", default = 14, help = "Fontsizes for the plots x and y ticks/labels and title.") + parser.add_argument("--filenames", default = None, help = "Name of file(s) to save the plots under. If all is provided to --plots-to-generate you must provide TWO filenames and list them as comma separated.") + parser.add_argument("--barplot-kwargs", default = None, help = "Keyword arguments for the generated barplot.") + parser.add_argument("--lineplot-kwargs", default = None, help = "Keyword arguments for the generated lineplot.") + parser.add_argument("--scatterplot-kwargs", default = None, help = "Keyword arguments for the generated scatterplot.") + + return parser + +def get_papi_release_download_count() -> tuple[int, list[str], list[int]]: + """For each PAPI release on GitHub get the download count. + + :returns: A tuple containing number of releases, name of releases, and the download count per release. + :rtype: tuple + """ + repo_owner = "icl-utk-edu" + repo = "papi" + + #TODO: Need to load gh, I will have to check to make sure that a user has this loaded. + #TODO: Maybe make it as a list? + papi_release_info_json = subprocess.run(f"gh api -H 'Accept: application/vnd.github+json' -H 'X-GitHub-Api-Version: 2026-03-10' /repos/{repo_owner}/{repo}/releases", shell = True, capture_output = True, text = True) + papi_release_info_python = json.loads(papi_release_info_json.stdout) + + names_of_releases = [] + number_of_downloads_per_release = [] + #a list, that contains a dictionary, that dictionary has keys, a key is assets, which contains a list, which contains another dictionary + for release_entry in papi_release_info_python: + names_of_releases.insert(0, release_entry["name"]) + number_of_downloads_per_release.insert(0, release_entry["assets"][0]["download_count"]) + + return len(names_of_releases), names_of_releases, number_of_downloads_per_release + +def get_papi_unique_clones() -> tuple[int, list[str], list[int]]: + """For the last 14 days get the total number of unique clones. + + :returns: A tuple containing the number of timestamps, data of the timestamps, and number of unique clones per timestamp. + :rtype: tuple + """ + repo_owner = "icl-utk-edu" + repo = "papi" + + papi_traffic_json = subprocess.run(f"gh api -H 'Accept: application/vnd.github+json' -H 'X-GitHub-Api-Version: 2026-03-10' /repos/{repo_owner}/{repo}/traffic/clones", shell = True, capture_output = True, text = True) + papi_traffic_python = json.loads(papi_traffic_json.stdout) + + print(papi_traffic_python) + + timestamps = [] + number_of_unique_clones = [] + # List of dictionaries containing the time stamps and the unique download counts + for clone_entry in papi_traffic_python["clones"]: + date, _ = clone_entry["timestamp"].split("T") + timestamps.append(date) + number_of_unique_clones.append(clone_entry["uniques"]) + + return len(timestamps), timestamps, number_of_unique_clones + +def plot_papi_github_statistics(data_for_the_x_axis: list, data_for_the_y_axis: list, parsed_command_line_args: tuple) -> None: + """Plot either the PAPI GitHub release download count or PAPI GitHub unique clones. + Plots that can be generated include bar or line. + + :param data_for_the_x_axis: Either a list of PAPI GitHub releases or timestamps in the format YYYY-MM-DD. + :type data_for_x_axis: list + :param data_for_the_y_axis: Either a list of PAPI GitHub release download counts or number of unique clones. + :type data_for_y_axis: list + :param parsed_command_line_args: A tuple containing the parsed arguments from the command line interface. + :type parsed_command_line_args: tuple + """ + # Unpack the tuple of arguments that were created in parse_args + plots_to_generate, filenames, barplot_kwargs, lineplot_kwargs, scatterplot_kwargs, plots_fontsize, plots_figsize = parsed_command_line_args + fig, ax = plt.subplots(figsize = plots_figsize) + for plot,filename in zip(plots_to_generate, filenames): + # Showcase the PAPI GitHub download count via a barplot + if plot == "bar": + bars = ax.bar(data_for_the_x_axis, data_for_the_y_axis, **barplot_kwargs) + ax.bar_label(bars, padding = 3, fontsize = plots_fontsize) + # Showcase the PAPI GitHub download count via a lineplot + elif plot == "line": + ax.plot(data_for_the_x_axis, data_for_the_y_axis, zorder = 0, **lineplot_kwargs) + ax.scatter(data_for_the_x_axis, data_for_the_y_axis, zorder = 1, **scatterplot_kwargs) + # Add annotations + for name, count in zip(data_for_the_x_axis, data_for_the_y_axis): + ax.annotate(f"{count}", xy=(name, count + 70), ha = "center", fontsize = plots_fontsize) + # Plot option has yet to be implemented + else: + raise NotImplementedError + + # Handle the figures title + plots_title = "The Number of Downloads per PAPI Release via GitHub" + ax.set_title(plots_title, fontsize = plots_fontsize) + + # Handle the figures y-axis + plots_ylabel = "Number of Downloads" + yaxis_stepsize = 300 + yaxis_current_max = max(number_of_downloads_per_release) + ## The y-axis max is updated to be + yaxis_stepsize such that + ## a value is placed at the top left corner of the plot + yaxis_updated_max = yaxis_current_max + yaxis_stepsize + ax.set_yticks(np.arange(0, yaxis_updated_max, yaxis_stepsize)) + ax.tick_params(axis = "y", labelsize = plots_fontsize) + ax.set_ylabel(plots_ylabel, fontsize = plots_fontsize) + + # Handle the figures x-axis + plots_xlabel = "PAPI Releases" + ax.set_xlabel(plots_xlabel, fontsize = plots_fontsize) + ax.tick_params(axis = "x", labelsize = plots_fontsize) + + # Save the figure + #fig.tight_layout() + #fig.savefig(filename) + +def parse_args(cmd_line_args: argparse.Namespace, number_of_papi_releases_on_gh: int): + """Parse the command line interface args and assign defaults if necessary. + + :param cmd_line_args + :type cmd_line_args: argparse.Namespace + :param + :type + :returns: + :rtype: + """ + # Handle the arg --plot-to-generate + list_of_plots = None + if cmd_line_args.plot_to_generate == "all": + list_of_plots = ["bar", "line"] + elif cmd_line_args.plot_to_generate == "bar": + list_of_plots = ["bar"] + else: + list_of_plots = ["line"] + + # Handle the arg --filenames + ## If the argument --filenames has been provided then + ## convert them to a list + list_of_filenames = [] + if cmd_line_args.filenames: + seperator = "," + for filename in cmd_line_args.filenames.split(seperator): + list_of_filenames.append(filename) + ## If the argument --filename has not been provided then + ## create our own + else: + for plot in list_of_plots: + list_of_filenames.append(f"number_of_downloads_per_papi_release_via_github_{plot}") + + # Number of plots and number of filenames must match + if len(list_of_plots) != len(list_of_filenames): + raise ValueError(f"The number of plots ({len(list_of_plots)}) does not match then number of filenames ({len(list_of_filenames)})") + + # Handle the arg --figsize + seperator = "," + width, height = cmd_line_args.figsize.split(seperator) + figsize = (int(width), int(height)) + + + colors = None + # Handle the arg --colors + if cmd_line_args.colors: + ## A list of colors were provided + if "," in cmd_line_args.colors: + seperator = "," + colors = cmd_line_args.colors.split(seperator) + if len(colors) != number_of_papi_releases_on_gh: + raise ValueError(f"A total of {len(colors)} colors were provided to --colors, but {number_of_papi_releases_on_gh} releases are being plotted.") + else: + try: + cmp = mpl.colormaps[cmd_line_args.colors] + except KeyError as e: + e.add_note(f"The colormap {cmd_line_args.colors} provided to --colors is not actually a colormap.") + raise + colors = cmp(np.linspace(0, 1, number_of_papi_releases_on_gh)) + + # Handle the arg --barplot-kwargs + barplot_kwargs = None + ## If the argument --barplot-kwargs has been provied then + ## convert the JSON to a Python dictionary to be used + if cmd_line_args.barplot_kwargs: + barplot_kwargs = json.loads(cmd_line_args.barplot_kwargs) + # If the argument --barplot-kwargs has not been provied then + # use default + else: + barplot_kwargs = { + "edgecolor": "black", + "linewidth": 1.5, + "color": colors, + } + + # Handle the arg --lineplot-kwargs + lineplot_kwargs = None + ## If the argument --lineplot-kwargs has been provied then + ## convert the JSON to a Python dictionary to be used + if cmd_line_args.lineplot_kwargs: + lineplot_kwargs = json.loads(cmd_line_args.lineplot_kwargs) + ## If the argument --lineplot-kwargs has not been provied then + ## use default + else: + lineplot_kwargs = { + "linewidth": 3, + "color": "black", + } + + # Handle the arg --scatterplot-kwargs + scatterplot_kwargs = None + ## If the argument --scatterplot-kwargs has been provied then + ## convert the JSON to a Python dictionary to be used + if cmd_line_args.scatterplot_kwargs: + scatterplot_kwargs = json.loads(cmd_line_args.scatterplot_kwargs) + ## If the argument --scatterplot-kwargs has not been provied then + ## use default + else: + scatterplot_kwargs = { + "marker": "o", + "s": 200, + "color": colors, + "edgecolor": "black", + "linewidth": 2, + } + + return list_of_plots, list_of_filenames, barplot_kwargs, lineplot_kwargs, scatterplot_kwargs, cmd_line_args.fontsize, figsize + + +#TODO: I will just make this an action. +if __name__ == "__main__": + # Get the GitHub release metadata + number_of_papi_releases_on_gh, names_of_papi_releases, number_of_downloads_per_release = get_papi_release_download_count() + + #number_of_entries, timestamps, clones = get_papi_unique_clones() + + # Parse the arguments + args = setup_args() + tuple_of_command_line_args = parse_args(args.parse_args(), number_of_papi_releases_on_gh) + #tuple_of_command_line_args = parse_args(args.parse_args(), number_of_entries) + + #plot_papi_github_statistics(timestamps, clones, tuple_of_command_line_args) + plot_papi_github_statistics(names_of_papi_releases, number_of_downloads_per_release, tuple_of_command_line_args) + +