#!/usr/bin/env python #*********************************************************** # # parse-link-check.py # # Arguments: # 1. working directory: path to directory containing output # of sphinx-build linkcheck (output.txt). Output of this # script (broken_links.html) will also be saved in this # location. # # External file dependencies: # 1. output.txt - the output of sphinx-build # 2. link-whitelist.txt - broken links that should be ignored. # This file should be in the same location as this script. # # Output: # 1. broken_links.html - provides count of broken and whitelist # matches. Also provides links to all flagged links. Will # appear in the same directory as output.txt # 2. Error code 255 if unexpected broken links are found # # Purpose: # This script supplements the built-in link checking of # sphinx-build. In practice, sphinx-build link checking # will produce a variety of false negatives. This script, # using the white list, will skip known false negatives and # anchors, providing an HTML digest of suspected actual # broken links if there are any. # #*********************************************************** import sys import re import os fileName = "output.txt" outFile = "broken_links.html" whitelistFile = "link-whitelist.txt" if len(sys.argv) < 2: print ("Enter path of input directory") sys.exit() scriptPath = sys.argv[0] outputPath = sys.argv[1] fileNamePath = outputPath + "/" + fileName outFilePath = outputPath + "/" + outFile whitelistFilePath = os.path.dirname(scriptPath) + "/" + whitelistFile with open (whitelistFilePath) as w: whLines = w.readlines() whitelist = [] for line in whLines: link = line.rstrip() whitelist.append(link) with open (fileNamePath) as f: lines = f.readlines() numBrokenLinks = 0 numWhiteListMatches = 0 numAnchors = 0 newLines = ["
"] whiteListLines = [] anchorLines = [] for line in lines: if "[broken]" in line: strings = line.split(" ") link = strings[2][:-1] link = link.strip() if link in whitelist: whiteListLines.append("" + strings[0] + "\n[whitelist] " + link + "\n") numWhiteListMatches += 1 print("[White list match] " + link) elif "Anchor '" in line: anchorLines.append("" + strings[0] + "\n
[anchor] " + link + "\n") numAnchors += 1 print("[Anchor not found] " + link) else: newLines.append("" + strings[0] + "\n
[broken] " + link + "\n") numBrokenLinks += 1 print("[broken link] " + link) newLines.insert(0,"