From 66deeedbc41c986af5e11b82f2700c4019e5226e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Feb 2026 04:00:07 +0000 Subject: [PATCH 1/4] Initial plan From da783409bf2ee839c50a51d7a8e78747003a5641 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Feb 2026 04:03:08 +0000 Subject: [PATCH 2/4] Add link-checker agentic workflow Create automated workflow to check and fix broken links in documentation: - Runs daily on weekdays - Pre-step collects and tests all links using scripting - Uses cache memory to track unfixable broken links - Creates PRs with fixed URLs when needed - Uses web-fetch for finding alternative URLs Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/link-checker.lock.yml | 1125 +++++++++++++++++++++++ .github/workflows/link-checker.md | 230 +++++ 2 files changed, 1355 insertions(+) create mode 100644 .github/workflows/link-checker.lock.yml create mode 100644 .github/workflows/link-checker.md diff --git a/.github/workflows/link-checker.lock.yml b/.github/workflows/link-checker.lock.yml new file mode 100644 index 0000000..c7db87d --- /dev/null +++ b/.github/workflows/link-checker.lock.yml @@ -0,0 +1,1125 @@ +# +# ___ _ _ +# / _ \ | | (_) +# | |_| | __ _ ___ _ __ | |_ _ ___ +# | _ |/ _` |/ _ \ '_ \| __| |/ __| +# | | | | (_| | __/ | | | |_| | (__ +# \_| |_/\__, |\___|_| |_|\__|_|\___| +# __/ | +# _ _ |___/ +# | | | | / _| | +# | | | | ___ _ __ _ __| |_| | _____ ____ +# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| +# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ +# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ +# +# This file was automatically generated by gh-aw (v0.45.3). DO NOT EDIT. +# +# To update this file, edit the corresponding .md file and run: +# gh aw compile +# Not all edits will cause changes to this file. +# +# For more information: https://github.github.com/gh-aw/introduction/overview/ +# +# Daily automated link checker that finds and fixes broken links in documentation files +# +# frontmatter-hash: 8cac416a65962362c3e177ba41ce0519e814d5816e454bdee19ef2c3eda90081 + +name: "Daily Link Checker & Fixer" +"on": + schedule: + - cron: "51 18 * * 1-5" + # Friendly format: daily on weekdays (scattered) + workflow_dispatch: + +permissions: {} + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "Daily Link Checker & Fixer" + +jobs: + activation: + runs-on: ubuntu-slim + permissions: + contents: read + outputs: + comment_id: "" + comment_repo: "" + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.45.3 + with: + destination: /opt/gh-aw/actions + - name: Checkout .github and .agents folders + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + sparse-checkout: | + .github + .agents + fetch-depth: 1 + persist-credentials: false + - name: Check workflow file timestamps + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_WORKFLOW_FILE: "link-checker.lock.yml" + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/check_workflow_timestamp_api.cjs'); + await main(); + - name: Create prompt with built-in context + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + run: | + bash /opt/gh-aw/actions/create_prompt_first.sh + cat << 'GH_AW_PROMPT_EOF' > "$GH_AW_PROMPT" + + GH_AW_PROMPT_EOF + cat "/opt/gh-aw/prompts/xpia.md" >> "$GH_AW_PROMPT" + cat "/opt/gh-aw/prompts/temp_folder_prompt.md" >> "$GH_AW_PROMPT" + cat "/opt/gh-aw/prompts/markdown.md" >> "$GH_AW_PROMPT" + cat "/opt/gh-aw/prompts/cache_memory_prompt.md" >> "$GH_AW_PROMPT" + cat << 'GH_AW_PROMPT_EOF' >> "$GH_AW_PROMPT" + + GitHub API Access Instructions + + The gh CLI is NOT authenticated. Do NOT use gh commands for GitHub operations. + + + To create or modify GitHub resources (issues, discussions, pull requests, etc.), you MUST call the appropriate safe output tool. Simply writing content will NOT work - the workflow requires actual tool calls. + + Temporary IDs: Some safe output tools support a temporary ID field (usually named temporary_id) so you can reference newly-created items elsewhere in the SAME agent output (for example, using #aw_abc1 in a later body). + + **IMPORTANT - temporary_id format rules:** + - If you DON'T need to reference the item later, OMIT the temporary_id field entirely (it will be auto-generated if needed) + - If you DO need cross-references/chaining, you MUST match this EXACT validation regex: /^aw_[A-Za-z0-9]{3,8}$/i + - Format: aw_ prefix followed by 3 to 8 alphanumeric characters (A-Z, a-z, 0-9, case-insensitive) + - Valid alphanumeric characters: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 + - INVALID examples: aw_ab (too short), aw_123456789 (too long), aw_test-id (contains hyphen), aw_id_123 (contains underscore) + - VALID examples: aw_abc, aw_abc1, aw_Test123, aw_A1B2C3D4, aw_12345678 + - To generate valid IDs: use 3-8 random alphanumeric characters or omit the field to let the system auto-generate + + Do NOT invent other aw_* formats — downstream steps will reject them with validation errors matching against /^aw_[A-Za-z0-9]{3,8}$/i. + + Discover available tools from the safeoutputs MCP server. + + **Critical**: Tool calls write structured data that downstream jobs process. Without tool calls, follow-up actions will be skipped. + + **Note**: If you made no other safe output tool calls during this workflow execution, call the "noop" tool to provide a status message indicating completion or that no actions were needed. + + + + The following GitHub context information is available for this workflow: + {{#if __GH_AW_GITHUB_ACTOR__ }} + - **actor**: __GH_AW_GITHUB_ACTOR__ + {{/if}} + {{#if __GH_AW_GITHUB_REPOSITORY__ }} + - **repository**: __GH_AW_GITHUB_REPOSITORY__ + {{/if}} + {{#if __GH_AW_GITHUB_WORKSPACE__ }} + - **workspace**: __GH_AW_GITHUB_WORKSPACE__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }} + - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }} + - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }} + - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }} + - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__ + {{/if}} + {{#if __GH_AW_GITHUB_RUN_ID__ }} + - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ + {{/if}} + + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' >> "$GH_AW_PROMPT" + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' >> "$GH_AW_PROMPT" + {{#runtime-import .github/workflows/link-checker.md}} + GH_AW_PROMPT_EOF + - name: Interpolate variables and render templates + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/interpolate_prompt.cjs'); + await main(); + - name: Substitute placeholders + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_ALLOWED_EXTENSIONS: '' + GH_AW_CACHE_DESCRIPTION: '' + GH_AW_CACHE_DIR: '/tmp/gh-aw/cache-memory/' + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + + const substitutePlaceholders = require('/opt/gh-aw/actions/substitute_placeholders.cjs'); + + // Call the substitution function + return await substitutePlaceholders({ + file: process.env.GH_AW_PROMPT, + substitutions: { + GH_AW_ALLOWED_EXTENSIONS: process.env.GH_AW_ALLOWED_EXTENSIONS, + GH_AW_CACHE_DESCRIPTION: process.env.GH_AW_CACHE_DESCRIPTION, + GH_AW_CACHE_DIR: process.env.GH_AW_CACHE_DIR, + GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, + GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER, + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, + GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, + GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE + } + }); + - name: Validate prompt placeholders + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /opt/gh-aw/actions/validate_prompt_placeholders.sh + - name: Print prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /opt/gh-aw/actions/print_prompt_summary.sh + - name: Upload prompt artifact + if: success() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: prompt + path: /tmp/gh-aw/aw-prompts/prompt.txt + retention-days: 1 + + agent: + needs: activation + runs-on: ubuntu-latest + permissions: read-all + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GH_AW_ASSETS_ALLOWED_EXTS: "" + GH_AW_ASSETS_BRANCH: "" + GH_AW_ASSETS_MAX_SIZE_KB: 0 + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + GH_AW_SAFE_OUTPUTS: /opt/gh-aw/safeoutputs/outputs.jsonl + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json + GH_AW_WORKFLOW_ID_SANITIZED: linkchecker + outputs: + checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} + has_patch: ${{ steps.collect_output.outputs.has_patch }} + model: ${{ steps.generate_aw_info.outputs.model }} + output: ${{ steps.collect_output.outputs.output }} + output_types: ${{ steps.collect_output.outputs.output_types }} + secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.45.3 + with: + destination: /opt/gh-aw/actions + - name: Create gh-aw temp directory + run: bash /opt/gh-aw/actions/create_gh_aw_tmp_dir.sh + - name: Checkout repository + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 + - id: link-check + name: Check and test all documentation links + run: "echo \"# Link Check Results\" > /tmp/link-check-results.md\necho \"\" >> /tmp/link-check-results.md\n\n# Find all markdown files in docs directory and README\necho \"Finding all markdown files...\"\nMARKDOWN_FILES=$(find docs README.md -type f -name \"*.md\" 2>/dev/null || echo \"\")\n\nif [ -z \"$MARKDOWN_FILES\" ]; then\n echo \"No markdown files found\"\n echo \"no_files=true\" >> $GITHUB_OUTPUT\n exit 0\nfi\n\n# Extract all links from markdown files\necho \"## Links Found\" >> /tmp/link-check-results.md\necho \"\" >> /tmp/link-check-results.md\n\n# Use grep to find markdown links and HTTP(S) URLs\nfor file in $MARKDOWN_FILES; do\n echo \"Checking $file...\"\n # Extract markdown links [text](url)\n grep -oP '\\[([^\\]]+)\\]\\(([^\\)]+)\\)' \"$file\" | grep -oP '\\(([^\\)]+)\\)' | tr -d '()' >> /tmp/all-links.txt 2>/dev/null || true\n # Extract plain HTTP(S) URLs\n grep -oP 'https?://[^\\s<>\"]+' \"$file\" >> /tmp/all-links.txt 2>/dev/null || true\ndone\n\n# Remove duplicates and sort\nif [ -f /tmp/all-links.txt ]; then\n sort -u /tmp/all-links.txt > /tmp/unique-links.txt\n LINK_COUNT=$(wc -l < /tmp/unique-links.txt)\n echo \"Found $LINK_COUNT unique links\" >> /tmp/link-check-results.md\n echo \"\" >> /tmp/link-check-results.md\nelse\n echo \"No links found\" >> /tmp/link-check-results.md\n echo \"no_links=true\" >> $GITHUB_OUTPUT\n exit 0\nfi\n\n# Test each link\necho \"## Link Test Results\" >> /tmp/link-check-results.md\necho \"\" >> /tmp/link-check-results.md\necho \"Testing links...\" >> /tmp/link-check-results.md\n\nBROKEN_COUNT=0\nWORKING_COUNT=0\n\nwhile IFS= read -r url; do\n # Skip relative links and anchors\n if [[ \"$url\" == \"#\"* ]] || [[ \"$url\" != \"http\"* ]]; then\n continue\n fi\n \n # Test the link with curl\n HTTP_CODE=$(curl -L -s -o /dev/null -w \"%{http_code}\" --max-time 10 \"$url\" 2>/dev/null || echo \"000\")\n \n if [[ \"$HTTP_CODE\" =~ ^2 ]] || [[ \"$HTTP_CODE\" =~ ^3 ]]; then\n WORKING_COUNT=$((WORKING_COUNT + 1))\n echo \"✅ $url (HTTP $HTTP_CODE)\" >> /tmp/link-check-results.md\n else\n BROKEN_COUNT=$((BROKEN_COUNT + 1))\n echo \"❌ $url (HTTP $HTTP_CODE)\" >> /tmp/link-check-results.md\n fi\ndone < /tmp/unique-links.txt\n\necho \"\" >> /tmp/link-check-results.md\necho \"**Summary:** $WORKING_COUNT working, $BROKEN_COUNT broken\" >> /tmp/link-check-results.md\n\n# Output results\necho \"broken_count=$BROKEN_COUNT\" >> $GITHUB_OUTPUT\necho \"working_count=$WORKING_COUNT\" >> $GITHUB_OUTPUT\n\ncat /tmp/link-check-results.md\n" + shell: bash + + # Cache memory file share configuration from frontmatter processed below + - name: Create cache-memory directory + run: bash /opt/gh-aw/actions/create_cache_memory_dir.sh + - name: Restore cache-memory file share data + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}-${{ github.run_id }} + path: /tmp/gh-aw/cache-memory + restore-keys: | + memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}- + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Checkout PR branch + id: checkout-pr + if: | + github.event.pull_request + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/checkout_pr_branch.cjs'); + await main(); + - name: Generate agentic run info + id: generate_aw_info + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const fs = require('fs'); + + const awInfo = { + engine_id: "copilot", + engine_name: "GitHub Copilot CLI", + model: process.env.GH_AW_MODEL_AGENT_COPILOT || "", + version: "", + agent_version: "0.0.410", + cli_version: "v0.45.3", + workflow_name: "Daily Link Checker & Fixer", + experimental: false, + supports_tools_allowlist: true, + run_id: context.runId, + run_number: context.runNumber, + run_attempt: process.env.GITHUB_RUN_ATTEMPT, + repository: context.repo.owner + '/' + context.repo.repo, + ref: context.ref, + sha: context.sha, + actor: context.actor, + event_name: context.eventName, + staged: false, + allowed_domains: ["node","python","github"], + firewall_enabled: true, + awf_version: "v0.19.1", + awmg_version: "v0.1.4", + steps: { + firewall: "squid" + }, + created_at: new Date().toISOString() + }; + + // Write to /tmp/gh-aw directory to avoid inclusion in PR + const tmpPath = '/tmp/gh-aw/aw_info.json'; + fs.writeFileSync(tmpPath, JSON.stringify(awInfo, null, 2)); + console.log('Generated aw_info.json at:', tmpPath); + console.log(JSON.stringify(awInfo, null, 2)); + + // Set model as output for reuse in other steps/jobs + core.setOutput('model', awInfo.model); + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Install GitHub Copilot CLI + run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.410 + - name: Install awf binary + run: bash /opt/gh-aw/actions/install_awf_binary.sh v0.19.1 + - name: Determine automatic lockdown mode for GitHub MCP Server + id: determine-automatic-lockdown + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + with: + script: | + const determineAutomaticLockdown = require('/opt/gh-aw/actions/determine_automatic_lockdown.cjs'); + await determineAutomaticLockdown(github, context, core); + - name: Download container images + run: bash /opt/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.19.1 ghcr.io/github/gh-aw-firewall/squid:0.19.1 ghcr.io/github/gh-aw-mcpg:v0.1.4 ghcr.io/github/github-mcp-server:v0.30.3 node:lts-alpine + - name: Write Safe Outputs Config + run: | + mkdir -p /opt/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs + cat > /opt/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF' + {"create_pull_request":{},"missing_data":{},"missing_tool":{},"noop":{"max":1}} + GH_AW_SAFE_OUTPUTS_CONFIG_EOF + cat > /opt/gh-aw/safeoutputs/tools.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_EOF' + [ + { + "description": "Create a new GitHub pull request to propose code changes. Use this after making file edits to submit them for review and merging. The PR will be created from the current branch with your committed changes. For code review comments on an existing PR, use create_pull_request_review_comment instead. CONSTRAINTS: Maximum 1 pull request(s) can be created. Title will be prefixed with \"[link-checker] \". Labels [documentation automated] will be automatically added.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "body": { + "description": "Detailed PR description in Markdown. Include what changes were made, why, testing notes, and any breaking changes. Do NOT repeat the title as a heading.", + "type": "string" + }, + "branch": { + "description": "Source branch name containing the changes. If omitted, uses the current working branch.", + "type": "string" + }, + "labels": { + "description": "Labels to categorize the PR (e.g., 'enhancement', 'bugfix'). Labels must exist in the repository.", + "items": { + "type": "string" + }, + "type": "array" + }, + "title": { + "description": "Concise PR title describing the changes. Follow repository conventions (e.g., conventional commits). The title appears as the main heading.", + "type": "string" + } + }, + "required": [ + "title", + "body" + ], + "type": "object" + }, + "name": "create_pull_request" + }, + { + "description": "Report that a tool or capability needed to complete the task is not available, or share any information you deem important about missing functionality or limitations. Use this when you cannot accomplish what was requested because the required functionality is missing or access is restricted.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "alternatives": { + "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).", + "type": "string" + }, + "reason": { + "description": "Explanation of why this tool is needed or what information you want to share about the limitation (max 256 characters).", + "type": "string" + }, + "tool": { + "description": "Optional: Name or description of the missing tool or capability (max 128 characters). Be specific about what functionality is needed.", + "type": "string" + } + }, + "required": [ + "reason" + ], + "type": "object" + }, + "name": "missing_tool" + }, + { + "description": "Log a transparency message when no significant actions are needed. Use this to confirm workflow completion and provide visibility when analysis is complete but no changes or outputs are required (e.g., 'No issues found', 'All checks passed'). This ensures the workflow produces human-visible output even when no other actions are taken.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "message": { + "description": "Status or completion message to log. Should explain what was analyzed and the outcome (e.g., 'Code review complete - no issues found', 'Analysis complete - all tests passing').", + "type": "string" + } + }, + "required": [ + "message" + ], + "type": "object" + }, + "name": "noop" + }, + { + "description": "Report that data or information needed to complete the task is not available. Use this when you cannot accomplish what was requested because required data, context, or information is missing.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "alternatives": { + "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).", + "type": "string" + }, + "context": { + "description": "Additional context about the missing data or where it should come from (max 256 characters).", + "type": "string" + }, + "data_type": { + "description": "Type or description of the missing data or information (max 128 characters). Be specific about what data is needed.", + "type": "string" + }, + "reason": { + "description": "Explanation of why this data is needed to complete the task (max 256 characters).", + "type": "string" + } + }, + "required": [], + "type": "object" + }, + "name": "missing_data" + } + ] + GH_AW_SAFE_OUTPUTS_TOOLS_EOF + cat > /opt/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_EOF' + { + "create_pull_request": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "branch": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "labels": { + "type": "array", + "itemType": "string", + "itemSanitize": true, + "itemMaxLength": 128 + }, + "title": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "missing_tool": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 512 + }, + "reason": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "tool": { + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "noop": { + "defaultMax": 1, + "fields": { + "message": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + } + } + } + } + GH_AW_SAFE_OUTPUTS_VALIDATION_EOF + - name: Generate Safe Outputs MCP Server Config + id: safe-outputs-config + run: | + # Generate a secure random API key (360 bits of entropy, 40+ chars) + # Mask immediately to prevent timing vulnerabilities + API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${API_KEY}" + + PORT=3001 + + # Set outputs for next steps + { + echo "safe_outputs_api_key=${API_KEY}" + echo "safe_outputs_port=${PORT}" + } >> "$GITHUB_OUTPUT" + + echo "Safe Outputs MCP server will run on port ${PORT}" + + - name: Start Safe Outputs MCP HTTP Server + id: safe-outputs-start + env: + DEBUG: '*' + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }} + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + run: | + # Environment variables are set above to prevent template injection + export DEBUG + export GH_AW_SAFE_OUTPUTS_PORT + export GH_AW_SAFE_OUTPUTS_API_KEY + export GH_AW_SAFE_OUTPUTS_TOOLS_PATH + export GH_AW_SAFE_OUTPUTS_CONFIG_PATH + export GH_AW_MCP_LOG_DIR + + bash /opt/gh-aw/actions/start_safe_outputs_server.sh + + - name: Start MCP Gateway + id: start-mcp-gateway + env: + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }} + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }} + GITHUB_MCP_LOCKDOWN: ${{ steps.determine-automatic-lockdown.outputs.lockdown == 'true' && '1' || '0' }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + set -eo pipefail + mkdir -p /tmp/gh-aw/mcp-config + + # Export gateway environment variables for MCP config and gateway script + export MCP_GATEWAY_PORT="80" + export MCP_GATEWAY_DOMAIN="host.docker.internal" + MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${MCP_GATEWAY_API_KEY}" + export MCP_GATEWAY_API_KEY + export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads" + mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}" + export DEBUG="*" + + export GH_AW_ENGINE="copilot" + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_LOCKDOWN -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.1.4' + + mkdir -p /home/runner/.copilot + cat << GH_AW_MCP_CONFIG_EOF | bash /opt/gh-aw/actions/start_mcp_gateway.sh + { + "mcpServers": { + "github": { + "type": "stdio", + "container": "ghcr.io/github/github-mcp-server:v0.30.3", + "env": { + "GITHUB_LOCKDOWN_MODE": "$GITHUB_MCP_LOCKDOWN", + "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", + "GITHUB_READ_ONLY": "1", + "GITHUB_TOOLSETS": "context,repos,issues,pull_requests" + } + }, + "safeoutputs": { + "type": "http", + "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", + "headers": { + "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}" + } + } + }, + "gateway": { + "port": $MCP_GATEWAY_PORT, + "domain": "${MCP_GATEWAY_DOMAIN}", + "apiKey": "${MCP_GATEWAY_API_KEY}", + "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" + } + } + GH_AW_MCP_CONFIG_EOF + - name: Generate workflow overview + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { generateWorkflowOverview } = require('/opt/gh-aw/actions/generate_workflow_overview.cjs'); + await generateWorkflowOverview(core); + - name: Download prompt artifact + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: prompt + path: /tmp/gh-aw/aw-prompts + - name: Clean git credentials + run: bash /opt/gh-aw/actions/clean_git_credentials.sh + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + timeout-minutes: 60 + run: | + set -o pipefail + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --allow-domains '*.githubusercontent.com,*.jsr.io,*.pythonhosted.org,anaconda.org,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.npms.io,binstar.org,bootstrap.pypa.io,bun.sh,codeload.github.com,conda.anaconda.org,conda.binstar.org,deb.nodesource.com,deno.land,files.pythonhosted.org,get.pnpm.io,github-cloud.githubusercontent.com,github-cloud.s3.amazonaws.com,github.com,github.githubassets.com,host.docker.internal,jsr.io,lfs.github.com,nodejs.org,npm.pkg.github.com,npmjs.com,npmjs.org,objects.githubusercontent.com,pip.pypa.io,pypi.org,pypi.python.org,raw.githubusercontent.com,registry.bower.io,registry.npmjs.com,registry.npmjs.org,registry.yarnpkg.com,repo.anaconda.com,repo.continuum.io,repo.yarnpkg.com,skimdb.npmjs.com,telemetry.enterprise.githubcopilot.com,www.npmjs.com,www.npmjs.org,yarnpkg.com' --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.19.1 --skip-pull \ + -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --add-dir /tmp/gh-aw/cache-memory/ --allow-all-paths --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"${GH_AW_MODEL_AGENT_COPILOT:+ --model "$GH_AW_MODEL_AGENT_COPILOT"}' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json + GH_AW_MODEL_AGENT_COPILOT: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Copy Copilot session state files to logs + if: always() + continue-on-error: true + run: | + # Copy Copilot session state files to logs folder for artifact collection + # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them + SESSION_STATE_DIR="$HOME/.copilot/session-state" + LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs" + + if [ -d "$SESSION_STATE_DIR" ]; then + echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR" + mkdir -p "$LOGS_DIR" + cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true + echo "Session state files copied successfully" + else + echo "No session-state directory found at $SESSION_STATE_DIR" + fi + - name: Stop MCP Gateway + if: always() + continue-on-error: true + env: + MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }} + MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }} + GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }} + run: | + bash /opt/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID" + - name: Redact secrets in logs + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/redact_secrets.cjs'); + await main(); + env: + GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' + SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload Safe Outputs + if: always() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: safe-output + path: ${{ env.GH_AW_SAFE_OUTPUTS }} + if-no-files-found: warn + - name: Ingest agent output + id: collect_output + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_ALLOWED_DOMAINS: "*.githubusercontent.com,*.jsr.io,*.pythonhosted.org,anaconda.org,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.npms.io,binstar.org,bootstrap.pypa.io,bun.sh,codeload.github.com,conda.anaconda.org,conda.binstar.org,deb.nodesource.com,deno.land,files.pythonhosted.org,get.pnpm.io,github-cloud.githubusercontent.com,github-cloud.s3.amazonaws.com,github.com,github.githubassets.com,host.docker.internal,jsr.io,lfs.github.com,nodejs.org,npm.pkg.github.com,npmjs.com,npmjs.org,objects.githubusercontent.com,pip.pypa.io,pypi.org,pypi.python.org,raw.githubusercontent.com,registry.bower.io,registry.npmjs.com,registry.npmjs.org,registry.yarnpkg.com,repo.anaconda.com,repo.continuum.io,repo.yarnpkg.com,skimdb.npmjs.com,telemetry.enterprise.githubcopilot.com,www.npmjs.com,www.npmjs.org,yarnpkg.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/collect_ndjson_output.cjs'); + await main(); + - name: Upload sanitized agent output + if: always() && env.GH_AW_AGENT_OUTPUT + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: agent-output + path: ${{ env.GH_AW_AGENT_OUTPUT }} + if-no-files-found: warn + - name: Upload engine output files + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: agent_outputs + path: | + /tmp/gh-aw/sandbox/agent/logs/ + /tmp/gh-aw/redacted-urls.log + if-no-files-found: ignore + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_copilot_log.cjs'); + await main(); + - name: Parse MCP Gateway logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_mcp_gateway_log.cjs'); + await main(); + - name: Print firewall logs + if: always() + continue-on-error: true + env: + AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs + run: | + # Fix permissions on firewall logs so they can be uploaded as artifacts + # AWF runs with sudo, creating files owned by root + sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true + # Only run awf logs summary if awf command exists (it may not be installed if workflow failed before install step) + if command -v awf &> /dev/null; then + awf logs summary | tee -a "$GITHUB_STEP_SUMMARY" + else + echo 'AWF binary not installed, skipping firewall log summary' + fi + - name: Upload cache-memory data as artifact + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + if: always() + with: + name: cache-memory + path: /tmp/gh-aw/cache-memory + - name: Upload agent artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: agent-artifacts + path: | + /tmp/gh-aw/aw-prompts/prompt.txt + /tmp/gh-aw/aw_info.json + /tmp/gh-aw/mcp-logs/ + /tmp/gh-aw/sandbox/firewall/logs/ + /tmp/gh-aw/agent-stdio.log + /tmp/gh-aw/agent/ + /tmp/gh-aw/aw.patch + if-no-files-found: ignore + + conclusion: + needs: + - activation + - agent + - detection + - safe_outputs + - update_cache_memory + if: (always()) && (needs.agent.result != 'skipped') + runs-on: ubuntu-slim + permissions: + contents: write + issues: write + pull-requests: write + outputs: + noop_message: ${{ steps.noop.outputs.noop_message }} + tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} + total_count: ${{ steps.missing_tool.outputs.total_count }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.45.3 + with: + destination: /opt/gh-aw/actions + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-output + path: /tmp/gh-aw/safeoutputs/ + - name: Setup agent output environment variable + run: | + mkdir -p /tmp/gh-aw/safeoutputs/ + find "/tmp/gh-aw/safeoutputs/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" + - name: Process No-Op Messages + id: noop + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_NOOP_MAX: 1 + GH_AW_WORKFLOW_NAME: "Daily Link Checker & Fixer" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/noop.cjs'); + await main(); + - name: Record Missing Tool + id: missing_tool + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Daily Link Checker & Fixer" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/missing_tool.cjs'); + await main(); + - name: Handle Agent Failure + id: handle_agent_failure + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Daily Link Checker & Fixer" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_WORKFLOW_ID: "link-checker" + GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.agent.outputs.secret_verification_result }} + GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_agent_failure.cjs'); + await main(); + - name: Handle No-Op Message + id: handle_noop_message + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Daily Link Checker & Fixer" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} + GH_AW_NOOP_REPORT_AS_ISSUE: "true" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_noop_message.cjs'); + await main(); + - name: Handle Create Pull Request Error + id: handle_create_pr_error + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Daily Link Checker & Fixer" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_create_pr_error.cjs'); + await main(); + + detection: + needs: agent + if: needs.agent.outputs.output_types != '' || needs.agent.outputs.has_patch == 'true' + runs-on: ubuntu-latest + permissions: {} + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + timeout-minutes: 10 + outputs: + success: ${{ steps.parse_results.outputs.success }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.45.3 + with: + destination: /opt/gh-aw/actions + - name: Download agent artifacts + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-artifacts + path: /tmp/gh-aw/threat-detection/ + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-output + path: /tmp/gh-aw/threat-detection/ + - name: Echo agent output types + env: + AGENT_OUTPUT_TYPES: ${{ needs.agent.outputs.output_types }} + run: | + echo "Agent output-types: $AGENT_OUTPUT_TYPES" + - name: Setup threat detection + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + WORKFLOW_NAME: "Daily Link Checker & Fixer" + WORKFLOW_DESCRIPTION: "Daily automated link checker that finds and fixes broken links in documentation files" + HAS_PATCH: ${{ needs.agent.outputs.has_patch }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/setup_threat_detection.cjs'); + await main(); + - name: Ensure threat-detection directory and log + run: | + mkdir -p /tmp/gh-aw/threat-detection + touch /tmp/gh-aw/threat-detection/detection.log + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Install GitHub Copilot CLI + run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.410 + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + # --allow-tool shell(cat) + # --allow-tool shell(grep) + # --allow-tool shell(head) + # --allow-tool shell(jq) + # --allow-tool shell(ls) + # --allow-tool shell(tail) + # --allow-tool shell(wc) + timeout-minutes: 20 + run: | + set -o pipefail + COPILOT_CLI_INSTRUCTION="$(cat /tmp/gh-aw/aw-prompts/prompt.txt)" + mkdir -p /tmp/ + mkdir -p /tmp/gh-aw/ + mkdir -p /tmp/gh-aw/agent/ + mkdir -p /tmp/gh-aw/sandbox/agent/logs/ + copilot --add-dir /tmp/ --add-dir /tmp/gh-aw/ --add-dir /tmp/gh-aw/agent/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-tool 'shell(cat)' --allow-tool 'shell(grep)' --allow-tool 'shell(head)' --allow-tool 'shell(jq)' --allow-tool 'shell(ls)' --allow-tool 'shell(tail)' --allow-tool 'shell(wc)' --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$COPILOT_CLI_INSTRUCTION"${GH_AW_MODEL_DETECTION_COPILOT:+ --model "$GH_AW_MODEL_DETECTION_COPILOT"} 2>&1 | tee /tmp/gh-aw/threat-detection/detection.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MODEL_DETECTION_COPILOT: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Parse threat detection results + id: parse_results + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_threat_detection_results.cjs'); + await main(); + - name: Upload threat detection log + if: always() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: threat-detection.log + path: /tmp/gh-aw/threat-detection/detection.log + if-no-files-found: ignore + + safe_outputs: + needs: + - activation + - agent + - detection + if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (needs.detection.outputs.success == 'true') + runs-on: ubuntu-slim + permissions: + contents: write + issues: write + pull-requests: write + timeout-minutes: 15 + env: + GH_AW_ENGINE_ID: "copilot" + GH_AW_WORKFLOW_ID: "link-checker" + GH_AW_WORKFLOW_NAME: "Daily Link Checker & Fixer" + outputs: + create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} + create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} + process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} + process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.45.3 + with: + destination: /opt/gh-aw/actions + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-output + path: /tmp/gh-aw/safeoutputs/ + - name: Setup agent output environment variable + run: | + mkdir -p /tmp/gh-aw/safeoutputs/ + find "/tmp/gh-aw/safeoutputs/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" + - name: Download patch artifact + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-artifacts + path: /tmp/gh-aw/ + - name: Checkout repository + if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (contains(needs.agent.outputs.output_types, 'create_pull_request')) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + token: ${{ github.token }} + persist-credentials: false + fetch-depth: 1 + - name: Configure Git credentials + if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (contains(needs.agent.outputs.output_types, 'create_pull_request')) + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + GIT_TOKEN: ${{ github.token }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${GIT_TOKEN}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Process Safe Outputs + id: process_safe_outputs + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_pull_request\":{\"base_branch\":\"${{ github.ref_name }}\",\"draft\":false,\"if_no_changes\":\"warn\",\"labels\":[\"documentation\",\"automated\"],\"max\":1,\"max_patch_size\":1024,\"title_prefix\":\"[link-checker] \"},\"missing_data\":{},\"missing_tool\":{}}" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/safe_output_handler_manager.cjs'); + await main(); + + update_cache_memory: + needs: + - agent + - detection + if: always() && needs.detection.outputs.success == 'true' + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.45.3 + with: + destination: /opt/gh-aw/actions + - name: Download cache-memory artifact (default) + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + continue-on-error: true + with: + name: cache-memory + path: /tmp/gh-aw/cache-memory + - name: Save cache-memory to cache (default) + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}-${{ github.run_id }} + path: /tmp/gh-aw/cache-memory + diff --git a/.github/workflows/link-checker.md b/.github/workflows/link-checker.md new file mode 100644 index 0000000..b371613 --- /dev/null +++ b/.github/workflows/link-checker.md @@ -0,0 +1,230 @@ +--- +description: Daily automated link checker that finds and fixes broken links in documentation files +on: + schedule: daily on weekdays +permissions: read-all +timeout-minutes: 60 +network: + allowed: + - node + - python + - github +steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check and test all documentation links + id: link-check + run: | + echo "# Link Check Results" > /tmp/link-check-results.md + echo "" >> /tmp/link-check-results.md + + # Find all markdown files in docs directory and README + echo "Finding all markdown files..." + MARKDOWN_FILES=$(find docs README.md -type f -name "*.md" 2>/dev/null || echo "") + + if [ -z "$MARKDOWN_FILES" ]; then + echo "No markdown files found" + echo "no_files=true" >> $GITHUB_OUTPUT + exit 0 + fi + + # Extract all links from markdown files + echo "## Links Found" >> /tmp/link-check-results.md + echo "" >> /tmp/link-check-results.md + + # Use grep to find markdown links and HTTP(S) URLs + for file in $MARKDOWN_FILES; do + echo "Checking $file..." + # Extract markdown links [text](url) + grep -oP '\[([^\]]+)\]\(([^\)]+)\)' "$file" | grep -oP '\(([^\)]+)\)' | tr -d '()' >> /tmp/all-links.txt 2>/dev/null || true + # Extract plain HTTP(S) URLs + grep -oP 'https?://[^\s<>"]+' "$file" >> /tmp/all-links.txt 2>/dev/null || true + done + + # Remove duplicates and sort + if [ -f /tmp/all-links.txt ]; then + sort -u /tmp/all-links.txt > /tmp/unique-links.txt + LINK_COUNT=$(wc -l < /tmp/unique-links.txt) + echo "Found $LINK_COUNT unique links" >> /tmp/link-check-results.md + echo "" >> /tmp/link-check-results.md + else + echo "No links found" >> /tmp/link-check-results.md + echo "no_links=true" >> $GITHUB_OUTPUT + exit 0 + fi + + # Test each link + echo "## Link Test Results" >> /tmp/link-check-results.md + echo "" >> /tmp/link-check-results.md + echo "Testing links..." >> /tmp/link-check-results.md + + BROKEN_COUNT=0 + WORKING_COUNT=0 + + while IFS= read -r url; do + # Skip relative links and anchors + if [[ "$url" == "#"* ]] || [[ "$url" != "http"* ]]; then + continue + fi + + # Test the link with curl + HTTP_CODE=$(curl -L -s -o /dev/null -w "%{http_code}" --max-time 10 "$url" 2>/dev/null || echo "000") + + if [[ "$HTTP_CODE" =~ ^2 ]] || [[ "$HTTP_CODE" =~ ^3 ]]; then + WORKING_COUNT=$((WORKING_COUNT + 1)) + echo "✅ $url (HTTP $HTTP_CODE)" >> /tmp/link-check-results.md + else + BROKEN_COUNT=$((BROKEN_COUNT + 1)) + echo "❌ $url (HTTP $HTTP_CODE)" >> /tmp/link-check-results.md + fi + done < /tmp/unique-links.txt + + echo "" >> /tmp/link-check-results.md + echo "**Summary:** $WORKING_COUNT working, $BROKEN_COUNT broken" >> /tmp/link-check-results.md + + # Output results + echo "broken_count=$BROKEN_COUNT" >> $GITHUB_OUTPUT + echo "working_count=$WORKING_COUNT" >> $GITHUB_OUTPUT + + cat /tmp/link-check-results.md + shell: bash + +tools: + github: + toolsets: [default] + cache-memory: true + web-fetch: + +safe-outputs: + create-pull-request: + title-prefix: "[link-checker] " + labels: [documentation, automated] + draft: false + if-no-changes: "warn" + noop: +--- + +# Daily Link Checker & Fixer + +You are an automated link checker and fixer agent. Your job is to find and fix broken links in the documentation files of this repository. + +## Your Mission + +Your workflow has already collected and tested all links in the previous step. Use the test results to identify broken links and fix them where possible. + +## Step 1: Review Link Check Results + +The link check step has already run and created a report at `/tmp/link-check-results.md`. Read this file to see: +- All links found in the documentation +- Which links are working (✅) and which are broken (❌) +- HTTP status codes for each link + +Use bash to read the file: +```bash +cat /tmp/link-check-results.md +``` + +## Step 2: Load Cache Memory + +Check cache memory for previously identified unfixable broken links: +- Load the cache memory to see if there are any broken links we've tried to fix before but couldn't +- These are links that are permanently broken or removed from the internet +- Skip these links to avoid repeated attempts + +The cache memory should store a JSON object with this structure: +```json +{ + "unfixable_links": [ + { + "url": "https://example.com/removed-page", + "reason": "404 Not Found - content removed", + "first_seen": "2026-02-17" + } + ], + "last_run": "2026-02-17" +} +``` + +## Step 3: Research and Fix Broken Links + +For each broken link found in the test results (but NOT in the unfixable list): + +1. **Investigate the link:** + - Determine what the link was supposed to point to based on: + - The link text in the markdown + - The context around the link + - The surrounding documentation + +2. **Search for alternatives:** + - Use web-fetch to search for if the content has moved to a new URL + - Try common alternatives (www vs non-www, http vs https, with/without trailing slash) + - Look for redirects or updated documentation + - Check if there's an official replacement + +3. **Fix the link:** + - If you find a working replacement URL, use the `edit` tool to update the markdown file + - Replace the broken URL with the working one + - Make sure to preserve the link text and formatting + +4. **Document unfixable links:** + - If a link truly cannot be fixed (content permanently removed, no alternatives found): + - Add it to the unfixable_links list in cache memory + - Include the URL, reason, and date + - This prevents future runs from wasting time on the same broken link + +## Step 4: Update Cache Memory + +After processing all broken links: +- Update the cache memory with any new unfixable links +- Update the "last_run" timestamp +- Save the updated cache memory + +## Step 5: Create Pull Request or Noop + +Based on your work: + +**If you fixed any links:** +- Use the `create-pull-request` safe output to create a PR with your fixes +- In the PR body, include: + - A summary of how many links were fixed + - A list of the broken links and their replacements + - Any links that were added to the unfixable list +- Title format: "Fix broken documentation links" + +**If no links needed fixing:** +- Use the `noop` safe output with a clear message like: + - "All documentation links are working correctly" (if no broken links found) + - "All broken links are in the unfixable list, no new fixes available" (if broken links exist but can't be fixed) + +## Important Guidelines + +- **Be thorough:** Check every broken link carefully +- **Preserve context:** When replacing links, make sure the new URL points to equivalent or better content +- **Document everything:** Keep the cache memory up to date with unfixable links +- **Be selective:** Only add links to the unfixable list if you've genuinely tried to find alternatives +- **Use web-fetch wisely:** Try to fetch the broken URL and check for redirects or alternatives +- **Relative links:** Focus only on HTTP(S) links. Skip relative links and anchors (they're tested differently) + +## Example Cache Memory Update + +```json +{ + "unfixable_links": [ + { + "url": "https://old-docs.example.com/api/v1", + "reason": "Documentation site shut down, no replacement found despite searching", + "first_seen": "2026-02-17" + } + ], + "last_run": "2026-02-17" +} +``` + +## Context + +- Repository: `${{ github.repository }}` +- Run daily on weekdays to catch broken links early +- Link test results are available at `/tmp/link-check-results.md` From a212ca37335cf3135976ecd2aea75d2f665c999b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Feb 2026 04:04:05 +0000 Subject: [PATCH 3/4] Add documentation for link-checker workflow Create comprehensive documentation for the new link checker workflow and add it to the README Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- README.md | 1 + docs/link-checker.md | 128 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 docs/link-checker.md diff --git a/README.md b/README.md index f0a6a37..49fe043 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ You can use the "/plan" agent to turn the reports into actionable issues which c - [📖 Regular Documentation Update](docs/update-docs.md) - Update documentation automatically - [📖 Daily Documentation Updater](docs/daily-doc-updater.md) - Automatically update documentation based on recent code changes and merged PRs +- [🔗 Link Checker](docs/link-checker.md) - Daily automated link checker that finds and fixes broken links in documentation - [✨ Code Simplifier](docs/code-simplifier.md) - Automatically simplify recently modified code for improved clarity and maintainability - [⚡ Daily Progress](docs/daily-progress.md) - Automated daily feature development following a structured roadmap - [🧪 Daily Test Coverage Improver](docs/daily-test-improver.md) - Improve test coverage by adding meaningful tests to under-tested areas diff --git a/docs/link-checker.md b/docs/link-checker.md new file mode 100644 index 0000000..86306f2 --- /dev/null +++ b/docs/link-checker.md @@ -0,0 +1,128 @@ +# 🔗 Link Checker + +**Workflow file:** [`.github/workflows/link-checker.md`](../.github/workflows/link-checker.md) + +## What it does + +The Link Checker is an automated agentic workflow that: + +1. **Scans all documentation** for HTTP(S) links in markdown files +2. **Tests each link** to verify it's still working +3. **Fixes broken links** by finding replacements or alternatives +4. **Remembers unfixable links** using cache memory to avoid repeated attempts +5. **Creates pull requests** with the fixed links when changes are made + +## How it works + +### Pre-Processing Step (Scripting) + +Before the AI agent runs, a bash script: +- Finds all markdown files in the `docs/` directory and `README.md` +- Extracts all HTTP(S) links from markdown files +- Tests each link with `curl` to check HTTP status codes +- Generates a detailed report at `/tmp/link-check-results.md` + +### AI Agent Processing + +The AI agent then: +1. Reads the link check report to identify broken links +2. Loads cache memory to skip previously identified unfixable links +3. For each broken link: + - Investigates the link context to understand what it should point to + - Uses web-fetch to find if the content moved or has alternatives + - Tries common variations (www vs non-www, http vs https, etc.) + - Updates the markdown file with the working replacement URL +4. Updates cache memory with any new unfixable links +5. Creates a pull request with all fixes, or uses `noop` if nothing needs fixing + +### Cache Memory + +The workflow maintains a persistent cache of unfixable broken links: +```json +{ + "unfixable_links": [ + { + "url": "https://example.com/removed", + "reason": "404 Not Found - content permanently removed", + "first_seen": "2026-02-17" + } + ], + "last_run": "2026-02-17" +} +``` + +This prevents the workflow from repeatedly attempting to fix links that are permanently broken. + +## When it runs + +- **Daily on weekdays** (automatic fuzzy scheduling) +- **Manually** via workflow_dispatch (automatically enabled for fuzzy schedules) + +## Safe Outputs + +- **create-pull-request**: Creates PRs with fixed links, labeled with `documentation` and `automated` +- **noop**: Reports when all links are working or all broken links are unfixable + +## Tools Used + +- **github**: GitHub API access for repository operations +- **cache-memory**: Persistent storage for tracking unfixable links +- **web-fetch**: For checking if broken URLs have moved or have alternatives +- **bash**: For scripting the initial link extraction and testing +- **edit**: For updating markdown files with fixed links + +## Network Access + +The workflow has access to: +- `node` ecosystem (npm packages, Node.js tools) +- `python` ecosystem (pip packages, Python tools) +- `github` domain (GitHub API) + +## Example Output + +When the workflow finds and fixes broken links, it creates a PR with: + +**Title:** `[link-checker] Fix broken documentation links` + +**Body:** +```markdown +## Summary + +Fixed 3 broken links in the documentation. + +## Changes + +- docs/example.md: `https://old-site.com/api` → `https://new-site.com/api/v2` +- README.md: `https://deprecated.com/docs` → `https://current.com/documentation` +- docs/guide.md: `https://archived.org/page` → `https://web.archive.org/web/.../page` + +## Unfixable Links + +Added 1 link to the unfixable list: +- `https://shutdown-site.com` - 404 Not Found, site permanently shut down +``` + +## Benefits + +1. **Automated maintenance**: Keeps documentation links up to date without manual checking +2. **Smart fixing**: Uses AI to find appropriate replacements based on context +3. **Efficient**: Only processes new broken links, skips known unfixable ones +4. **Daily monitoring**: Catches broken links early before they cause issues +5. **Transparent**: Creates reviewable PRs instead of direct commits + +## Configuration + +The workflow is configured to: +- Run on weekdays only to avoid Monday backlog +- Use a 60-minute timeout for processing large documentation sets +- Create draft PRs for review before merging +- Warn if no changes are needed (via `if-no-changes: "warn"`) + +## Future Enhancements + +Possible improvements: +- Add support for checking relative links within the repository +- Verify anchor links (e.g., `#section-name`) +- Check for broken images +- Integrate with web archive services for finding archived versions +- Generate metrics on link health over time From ada13a7fb4b6e8c97d5579ece4dfce664633e52b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Feb 2026 04:04:57 +0000 Subject: [PATCH 4/4] Fix documentation inconsistency in link-checker Update docs to correctly state that PRs are created as ready-to-review (non-draft) Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- docs/link-checker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/link-checker.md b/docs/link-checker.md index 86306f2..98fc460 100644 --- a/docs/link-checker.md +++ b/docs/link-checker.md @@ -115,7 +115,7 @@ Added 1 link to the unfixable list: The workflow is configured to: - Run on weekdays only to avoid Monday backlog - Use a 60-minute timeout for processing large documentation sets -- Create draft PRs for review before merging +- Create ready-to-review PRs (non-draft) for quick merging - Warn if no changes are needed (via `if-no-changes: "warn"`) ## Future Enhancements