Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Manual backport fix github quota #50827

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 0 additions & 6 deletions .github/workflows/docbuild-and-upload.yml
Expand Up @@ -45,12 +45,6 @@ jobs:
- name: Build Pandas
uses: ./.github/actions/build_pandas

- name: Set up maintainers cache
uses: actions/cache@v3
with:
path: maintainers.json
key: maintainers

- name: Build website
run: python web/pandas_web.py web/pandas --target-path=web/build

Expand Down
6 changes: 4 additions & 2 deletions web/pandas/about/team.md
Expand Up @@ -9,7 +9,8 @@ If you want to support pandas development, you can find information in the [dona
## Active maintainers

<div class="card-group maintainers">
{% for person in maintainers.active_with_github_info %}
{% for username in maintainers.active %}
{% set person = maintainers.github_info.get(username) %}
<div class="card">
<img class="card-img-top" alt="" src="{{ person.avatar_url }}"/>
<div class="card-body">
Expand Down Expand Up @@ -63,7 +64,8 @@ The project governance is available in the [project governance page](governance.
## Inactive maintainers

<ul>
{% for person in maintainers.inactive_with_github_info %}
{% for username in maintainers.inactive %}
{% set person = maintainers.github_info.get(username) %}
<li>
<a href="{{ person.blog or person.html_url }}">
{{ person.name or person.login }}
Expand Down
2 changes: 1 addition & 1 deletion web/pandas/config.yml
@@ -1,10 +1,10 @@
main:
templates_path: _templates
base_template: "layout.html"
production_url: "https://pandas.pydata.org/"
ignore:
- _templates/layout.html
- config.yml
- try.md # the binder page will be added later
github_repo_url: pandas-dev/pandas
context_preprocessors:
- pandas_web.Preprocessors.current_year
Expand Down
104 changes: 58 additions & 46 deletions web/pandas_web.py
Expand Up @@ -158,35 +158,39 @@ def maintainers_add_info(context):
Given the active maintainers defined in the yaml file, it fetches
the GitHub user information for them.
"""
timestamp = time.time()

cache_file = pathlib.Path("maintainers.json")
if cache_file.is_file():
with open(cache_file) as f:
context["maintainers"] = json.load(f)
# refresh cache after 1 hour
if (timestamp - context["maintainers"]["timestamp"]) < 3_600:
return context

context["maintainers"]["timestamp"] = timestamp

repeated = set(context["maintainers"]["active"]) & set(
context["maintainers"]["inactive"]
)
if repeated:
raise ValueError(f"Maintainers {repeated} are both active and inactive")

for kind in ("active", "inactive"):
context["maintainers"][f"{kind}_with_github_info"] = []
for user in context["maintainers"][kind]:
resp = requests.get(f"https://api.github.com/users/{user}")
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
context["maintainers"][f"{kind}_with_github_info"].append(resp.json())
maintainers_info = {}
for user in (
context["maintainers"]["active"] + context["maintainers"]["inactive"]
):
resp = requests.get(f"https://api.github.com/users/{user}")
if resp.status_code == 403:
sys.stderr.write(
"WARN: GitHub API quota exceeded when fetching maintainers\n"
)
# if we exceed github api quota, we use the github info
# of maintainers saved with the website
resp_bkp = requests.get(
context["main"]["production_url"] + "maintainers.json"
)
resp_bkp.raise_for_status()
maintainers_info = resp_bkp.json()
break

resp.raise_for_status()
maintainers_info[user] = resp.json()

with open(cache_file, "w") as f:
json.dump(context["maintainers"], f)
context["maintainers"]["github_info"] = maintainers_info

# save the data fetched from github to use it in case we exceed
# git github api quota in the future
with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w") as f:
json.dump(maintainers_info, f)

return context

Expand All @@ -196,11 +200,19 @@ def home_add_releases(context):

github_repo_url = context["main"]["github_repo_url"]
resp = requests.get(f"https://api.github.com/repos/{github_repo_url}/releases")
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
if resp.status_code == 403:
sys.stderr.write("WARN: GitHub API quota exceeded when fetching releases\n")
resp_bkp = requests.get(context["main"]["production_url"] + "releases.json")
resp_bkp.raise_for_status()
releases = resp_bkp.json()
else:
resp.raise_for_status()
releases = resp.json()

for release in resp.json():
with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f:
json.dump(releases, f, default=datetime.datetime.isoformat)

for release in releases:
if release["prerelease"]:
continue
published = datetime.datetime.strptime(
Expand All @@ -218,6 +230,7 @@ def home_add_releases(context):
),
}
)

return context

@staticmethod
Expand Down Expand Up @@ -264,12 +277,20 @@ def roadmap_pdeps(context):
"https://api.github.com/search/issues?"
f"q=is:pr is:open label:PDEP repo:{github_repo_url}"
)
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
if resp.status_code == 403:
sys.stderr.write("WARN: GitHub API quota exceeded when fetching pdeps\n")
resp_bkp = requests.get(context["main"]["production_url"] + "pdeps.json")
resp_bkp.raise_for_status()
pdeps = resp_bkp.json()
else:
resp.raise_for_status()
pdeps = resp.json()

with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w") as f:
json.dump(pdeps, f)

for pdep in resp.json()["items"]:
context["pdeps"]["under_discussion"].append(
for pdep in pdeps["items"]:
context["pdeps"]["Under discussion"].append(
{"title": pdep["title"], "url": pdep["url"]}
)

Expand Down Expand Up @@ -302,7 +323,7 @@ def get_callable(obj_as_str: str) -> object:
return obj


def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
def get_context(config_fname: str, **kwargs):
"""
Load the config yaml as the base context, and enrich it with the
information added by the context preprocessors defined in the file.
Expand All @@ -311,7 +332,6 @@ def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
context = yaml.safe_load(f)

context["source_path"] = os.path.dirname(config_fname)
context["ignore_io_errors"] = ignore_io_errors
context.update(kwargs)

preprocessors = (
Expand Down Expand Up @@ -349,7 +369,9 @@ def extend_base_template(content: str, base_template: str) -> str:


def main(
source_path: str, target_path: str, base_url: str, ignore_io_errors: bool
source_path: str,
target_path: str,
base_url: str,
) -> int:
"""
Copy every file in the source directory to the target directory.
Expand All @@ -363,7 +385,7 @@ def main(
os.makedirs(target_path, exist_ok=True)

sys.stderr.write("Generating context...\n")
context = get_context(config_fname, ignore_io_errors, base_url=base_url)
context = get_context(config_fname, base_url=base_url, target_path=target_path)
sys.stderr.write("Context generated\n")

templates_path = os.path.join(source_path, context["main"]["templates_path"])
Expand Down Expand Up @@ -407,15 +429,5 @@ def main(
parser.add_argument(
"--base-url", default="", help="base url where the website is served from"
)
parser.add_argument(
"--ignore-io-errors",
action="store_true",
help="do not fail if errors happen when fetching "
"data from http sources, and those fail "
"(mostly useful to allow github quota errors "
"when running the script locally)",
)
args = parser.parse_args()
sys.exit(
main(args.source_path, args.target_path, args.base_url, args.ignore_io_errors)
)
sys.exit(main(args.source_path, args.target_path, args.base_url))