Check for broken links #739
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------------------------- | |
# GitHub Actions Workflow: Check for Broken Links on GitHub Pages Site | |
# ------------------------------------------------------------------------------------- | |
# This file defines a GitHub Actions *workflow* — a full automation process. | |
# It includes triggers (when it runs), and jobs (what it does). | |
# | |
# Inside the job, we *use an action* — a reusable piece of automation that handles | |
# one task (in this case, link checking). | |
# | |
# Reminder: Workflows are made up of jobs, and jobs are made up of steps. | |
# Steps often *use actions*, which are prebuilt helper tools. | |
name: Check for broken links # Workflow name (appears in GitHub's Actions UI) | |
on: | |
# Allows this workflow to be run manually from the GitHub UI | |
workflow_dispatch: | |
# Automatically run this workflow *after* the GitHub Pages site has been built and deployed. | |
# The `pages-build-deployment` workflow is automatically created by GitHub when Pages is enabled. | |
workflow_run: | |
workflows: | |
- pages-build-deployment | |
types: | |
- completed # Only trigger after that deployment workflow finishes | |
# Scheduled run using cron syntax (minute hour day month weekday) | |
schedule: | |
- cron: '49 11 * * 1' # Run every Monday at 11:49 UTC (6:49 AM CDT) | |
# ------------------------------------------------------------------------------------- | |
# Job: A self-contained unit of work run on a specific virtual machine (runner). | |
# ------------------------------------------------------------------------------------- | |
jobs: | |
crawl_for_broken_links: | |
# Specify the operating system environment where this job runs. | |
# This is required — GitHub needs to know which kind of runner to use. | |
# | |
# `ubuntu-latest` is the most common and has many pre-installed tools. | |
runs-on: ubuntu-latest | |
# Optional display name (helpful if your workflow has multiple jobs) | |
name: Broken-Links-Crawler | |
steps: | |
- name: Checking Links | |
# Use a prebuilt *action* — a reusable task from the GitHub Marketplace. | |
# This one crawls the site and detects broken hyperlinks. | |
# | |
# Source: https://github.com/ScholliYT/Broken-Links-Crawler-Action | |
uses: ScholliYT/[email protected] | |
with: | |
# URL to start crawling from — in this case, the homepage of your GitHub Pages site | |
website_url: 'https://uw-madison-datascience.github.io/ML-X-Nexus/' | |
verbose: true | |
# Using full URLs to "whitelist" these | |
exclude_url_prefix: > | |
"mailto, | |
mailto:, | |
https://dl.acm.org/doi/10.1145/3065386, | |
https://openai.com/research/, | |
https://www.sciencedirect.com/science/article/abs/pii/S0168169922006603, | |
https://www.kaggle.com/c/house-prices-advanced-regression-techniques, | |
https://www.kaggle.com/c/cifar-10/, | |
https://www.kaggle.com/competitions/infected-tomato-leaf-vein-segmentation, | |
https://www.kaggle.com/c/nlp-getting-started, | |
https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation, | |
https://mediaspace.wisc.edu/, | |
https://kb.wisc.edu/, | |
https://vimeo.com/, | |
https://player.vimeo.com/, | |
https://it.wisc.edu/exploring-artificial-intelligence-uw-madison/", | |
https://doi.org/10.1002/eap.2694, | |
https://www.mdpi.com/2072-4292/11/11/1309 | |
exclude_url_suffix: '.qmd' |