Skip to content

Commit

Permalink
Manual backport fix github quota (#50827)
Browse files Browse the repository at this point in the history
  • Loading branch information
datapythonista committed Jan 18, 2023
1 parent c2ab902 commit 5aff5c9
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 55 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/docbuild-and-upload.yml
Expand Up @@ -45,12 +45,6 @@ jobs:
- name: Build Pandas
uses: ./.github/actions/build_pandas

- name: Set up maintainers cache
uses: actions/cache@v3
with:
path: maintainers.json
key: maintainers

- name: Build website
run: python web/pandas_web.py web/pandas --target-path=web/build

Expand Down
6 changes: 4 additions & 2 deletions web/pandas/about/team.md
Expand Up @@ -9,7 +9,8 @@ If you want to support pandas development, you can find information in the [dona
## Active maintainers

<div class="card-group maintainers">
{% for person in maintainers.active_with_github_info %}
{% for username in maintainers.active %}
{% set person = maintainers.github_info.get(username) %}
<div class="card">
<img class="card-img-top" alt="" src="{{ person.avatar_url }}"/>
<div class="card-body">
Expand Down Expand Up @@ -63,7 +64,8 @@ The project governance is available in the [project governance page](governance.
## Inactive maintainers

<ul>
{% for person in maintainers.inactive_with_github_info %}
{% for username in maintainers.inactive %}
{% set person = maintainers.github_info.get(username) %}
<li>
<a href="{{ person.blog or person.html_url }}">
{{ person.name or person.login }}
Expand Down
2 changes: 1 addition & 1 deletion web/pandas/config.yml
@@ -1,10 +1,10 @@
main:
templates_path: _templates
base_template: "layout.html"
production_url: "https://pandas.pydata.org/"
ignore:
- _templates/layout.html
- config.yml
- try.md # the binder page will be added later
github_repo_url: pandas-dev/pandas
context_preprocessors:
- pandas_web.Preprocessors.current_year
Expand Down
104 changes: 58 additions & 46 deletions web/pandas_web.py
Expand Up @@ -158,35 +158,39 @@ def maintainers_add_info(context):
Given the active maintainers defined in the yaml file, it fetches
the GitHub user information for them.
"""
timestamp = time.time()

cache_file = pathlib.Path("maintainers.json")
if cache_file.is_file():
with open(cache_file) as f:
context["maintainers"] = json.load(f)
# refresh cache after 1 hour
if (timestamp - context["maintainers"]["timestamp"]) < 3_600:
return context

context["maintainers"]["timestamp"] = timestamp

repeated = set(context["maintainers"]["active"]) & set(
context["maintainers"]["inactive"]
)
if repeated:
raise ValueError(f"Maintainers {repeated} are both active and inactive")

for kind in ("active", "inactive"):
context["maintainers"][f"{kind}_with_github_info"] = []
for user in context["maintainers"][kind]:
resp = requests.get(f"https://api.github.com/users/{user}")
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
context["maintainers"][f"{kind}_with_github_info"].append(resp.json())
maintainers_info = {}
for user in (
context["maintainers"]["active"] + context["maintainers"]["inactive"]
):
resp = requests.get(f"https://api.github.com/users/{user}")
if resp.status_code == 403:
sys.stderr.write(
"WARN: GitHub API quota exceeded when fetching maintainers\n"
)
# if we exceed github api quota, we use the github info
# of maintainers saved with the website
resp_bkp = requests.get(
context["main"]["production_url"] + "maintainers.json"
)
resp_bkp.raise_for_status()
maintainers_info = resp_bkp.json()
break

resp.raise_for_status()
maintainers_info[user] = resp.json()

with open(cache_file, "w") as f:
json.dump(context["maintainers"], f)
context["maintainers"]["github_info"] = maintainers_info

# save the data fetched from github to use it in case we exceed
# git github api quota in the future
with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w") as f:
json.dump(maintainers_info, f)

return context

Expand All @@ -196,11 +200,19 @@ def home_add_releases(context):

github_repo_url = context["main"]["github_repo_url"]
resp = requests.get(f"https://api.github.com/repos/{github_repo_url}/releases")
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
if resp.status_code == 403:
sys.stderr.write("WARN: GitHub API quota exceeded when fetching releases\n")
resp_bkp = requests.get(context["main"]["production_url"] + "releases.json")
resp_bkp.raise_for_status()
releases = resp_bkp.json()
else:
resp.raise_for_status()
releases = resp.json()

for release in resp.json():
with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f:
json.dump(releases, f, default=datetime.datetime.isoformat)

for release in releases:
if release["prerelease"]:
continue
published = datetime.datetime.strptime(
Expand All @@ -218,6 +230,7 @@ def home_add_releases(context):
),
}
)

return context

@staticmethod
Expand Down Expand Up @@ -264,12 +277,20 @@ def roadmap_pdeps(context):
"https://api.github.com/search/issues?"
f"q=is:pr is:open label:PDEP repo:{github_repo_url}"
)
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
if resp.status_code == 403:
sys.stderr.write("WARN: GitHub API quota exceeded when fetching pdeps\n")
resp_bkp = requests.get(context["main"]["production_url"] + "pdeps.json")
resp_bkp.raise_for_status()
pdeps = resp_bkp.json()
else:
resp.raise_for_status()
pdeps = resp.json()

with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w") as f:
json.dump(pdeps, f)

for pdep in resp.json()["items"]:
context["pdeps"]["under_discussion"].append(
for pdep in pdeps["items"]:
context["pdeps"]["Under discussion"].append(
{"title": pdep["title"], "url": pdep["url"]}
)

Expand Down Expand Up @@ -302,7 +323,7 @@ def get_callable(obj_as_str: str) -> object:
return obj


def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
def get_context(config_fname: str, **kwargs):
"""
Load the config yaml as the base context, and enrich it with the
information added by the context preprocessors defined in the file.
Expand All @@ -311,7 +332,6 @@ def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
context = yaml.safe_load(f)

context["source_path"] = os.path.dirname(config_fname)
context["ignore_io_errors"] = ignore_io_errors
context.update(kwargs)

preprocessors = (
Expand Down Expand Up @@ -349,7 +369,9 @@ def extend_base_template(content: str, base_template: str) -> str:


def main(
source_path: str, target_path: str, base_url: str, ignore_io_errors: bool
source_path: str,
target_path: str,
base_url: str,
) -> int:
"""
Copy every file in the source directory to the target directory.
Expand All @@ -363,7 +385,7 @@ def main(
os.makedirs(target_path, exist_ok=True)

sys.stderr.write("Generating context...\n")
context = get_context(config_fname, ignore_io_errors, base_url=base_url)
context = get_context(config_fname, base_url=base_url, target_path=target_path)
sys.stderr.write("Context generated\n")

templates_path = os.path.join(source_path, context["main"]["templates_path"])
Expand Down Expand Up @@ -407,15 +429,5 @@ def main(
parser.add_argument(
"--base-url", default="", help="base url where the website is served from"
)
parser.add_argument(
"--ignore-io-errors",
action="store_true",
help="do not fail if errors happen when fetching "
"data from http sources, and those fail "
"(mostly useful to allow github quota errors "
"when running the script locally)",
)
args = parser.parse_args()
sys.exit(
main(args.source_path, args.target_path, args.base_url, args.ignore_io_errors)
)
sys.exit(main(args.source_path, args.target_path, args.base_url))

0 comments on commit 5aff5c9

Please sign in to comment.