Coverage for src/archive_md_urls/update_files.py: 55%
20 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-28 16:50 +0200
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-28 16:50 +0200
1"""Turn URLs in Markdown files to Wayback snapshots."""
3import re
4from pathlib import Path
5from typing import Optional
7from archive_md_urls.gather_snapshots import gather_snapshots
8from archive_md_urls.scan_md import scan_md
11async def update_files(files: list[Path]) -> None:
12 """Scan and update URLs in Markdown files.
14 File contents are updated in-place.
16 Args:
17 files (list[Path]): List of Markdown files to scan and update
18 """
19 # Keep count of changed URLs to summarize changes to user
20 changed_urls: int = 0
21 for file in files:
22 md_source: str = file.read_text(encoding="utf-8")
23 date, urls = scan_md(md_source, file)
24 # Call API and collect snapshots
25 wayback_urls: dict[str, Optional[str]] = await gather_snapshots(urls, date)
26 # Update links in file source and write file
27 updated_md_source: str = update_md_source(md_source, wayback_urls)
28 file.write_text(updated_md_source, encoding="utf-8")
29 changed_urls += len([item for item in wayback_urls.values() if item])
30 print(f"Changed {changed_urls} {'URL' if changed_urls == 1 else 'URLs'} "
31 f"in {len(files)} {'file' if len(files) == 1 else 'files'}.")
34def update_md_source(md_source: str, wayback_urls: dict[str, Optional[str]]) -> str:
35 """Replace URLs in Markdown file with Wayback Snapshots.
37 Args:
38 md_source (str): Content of Markdown file that should be updated
39 wayback_urls (dict[str, Optional[str]]): URL-Snapshot pairs
41 Returns:
42 str: Content of Markdown file with updated URLs
43 """
44 for url, snapshot in wayback_urls.items():
45 # Skip cases where no Wayback Snapshot was found
46 if snapshot:
47 # Only replace strings which are == url if they are preceded and
48 # followed by braces to avoid mismatches
49 md_source = re.sub(fr"(?<=\(){url}(?=\))", snapshot, md_source)
50 return md_source