Coverage for tests/test_scan_md.py: 100%
43 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-28 16:50 +0200
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-28 16:50 +0200
1import unittest
2from typing import Any
3from unittest import mock
5from archive_md_urls import scan_md
6from tests.testfiles import (TEST_MD1, TEST_MD1_SOURCE, TEST_MD2,
7 TEST_MD2_SOURCE, TEST_MD3, TEST_MD3_SOURCE,
8 TEST_YAML_SOURCE)
11class TestScanMD(unittest.TestCase):
12 """Test functions in scan_md."""
14 def test_filter_urls(self) -> None:
15 """Test if URL lists are correctly filtered."""
16 html, date = scan_md.convert_markdown(TEST_MD1_SOURCE)
17 urls: list[str] = scan_md.get_urls(html)
18 # Filtered list should only contain example.com and both Github URLs
19 correct_filtered_result: list[str] = [
20 "example.com", "github.com", "https://github.com/pypa/pip"
21 ]
22 # Result from filter_urls should have correct length and should contain
23 # all elements of filtered_result
24 self.assertEqual(len(scan_md.filter_urls(urls)), 3)
25 self.assertTrue(
26 all(url in scan_md.filter_urls(urls) for url in correct_filtered_result)
27 )
28 # Filtering STABLE_URLS should result in empty list
29 self.assertEqual(scan_md.filter_urls(scan_md.STABLE_URLS), [])
31 def test_format_date(self) -> None:
32 """Test if date is correctly formatted or returned as None."""
33 # Accepted inputs and expected results
34 self.assertEqual(scan_md.format_date("2010-03-07"), "201003070000")
35 self.assertEqual(scan_md.format_date("2013-04-11 14:50"), "201304111450")
36 self.assertEqual(scan_md.format_date("2015-05-17 17:49:59"), "201505171749")
37 self.assertEqual(scan_md.format_date("20.08.2018"), "201808200000")
38 # Hugo default
39 self.assertEqual(
40 scan_md.format_date("2019-03-26T08:47:11+01:00"), "201903260847"
41 )
42 # A human readable date string
43 self.assertEqual(
44 scan_md.format_date("Monday, October 10, 2014"), "201410100000"
45 )
46 # Various inputs that should raise dateutil ParserError and thus return None
47 self.assertEqual(scan_md.format_date("some wrong string"), None)
48 self.assertEqual(scan_md.format_date("2014-04-10 13:10:99"), None)
50 def test_convert_markdown(self) -> None:
51 """Test if date correctly extracted from metadata."""
52 html, date = scan_md.convert_markdown(TEST_MD1_SOURCE)
53 self.assertEqual(date, "2014-04-28")
54 html, date = scan_md.convert_markdown(TEST_YAML_SOURCE)
55 self.assertEqual(date, "2014-04-28")
56 html, date = scan_md.convert_markdown(TEST_MD2_SOURCE)
57 self.assertEqual(date, None)
58 html, date = scan_md.convert_markdown(TEST_MD3_SOURCE)
59 self.assertEqual(date, None)
61 def test_get_urls(self) -> None:
62 """Test if URLs are correctly extracted from HTML."""
63 html, date = scan_md.convert_markdown(TEST_MD1_SOURCE)
64 # All URLs contained in the test file
65 full_url_list: list[str] = [
66 "example.com", "example.com",
67 "github.com", "https://github.com/pypa/pip",
68 "https://web.archive.org/web/20000622042643/http://www.google.com/",
69 "https://doi.org/10.1080/32498327493.2014.358732798",
70 "{filename}/blog/2012/2012-02-05-an-even-older-blogpost.md"
71 ]
72 self.assertEqual(scan_md.get_urls(html), full_url_list)
74 @mock.patch("archive_md_urls.scan_md.format_date")
75 def test_scan_md(self, mock_format_date: Any) -> None:
76 """Test if format_date is called with correct value."""
77 # No date at all should result in calling format_date with trimmed string
78 scan_md.scan_md(TEST_MD2_SOURCE, TEST_MD2)
79 mock_format_date.assert_called_with("fake-blogp")
80 # Test with date in metadata
81 scan_md.scan_md(TEST_MD1_SOURCE, TEST_MD1)
82 mock_format_date.assert_called_with("2014-04-28")
83 # Test with date in file name only
84 scan_md.scan_md(TEST_MD3_SOURCE, TEST_MD3)
85 mock_format_date.assert_called_with("2014-04-28")