Coverage for tests/test_scan_md.py: 100%

43 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-28 16:50 +0200

1import unittest 

2from typing import Any 

3from unittest import mock 

4 

5from archive_md_urls import scan_md 

6from tests.testfiles import (TEST_MD1, TEST_MD1_SOURCE, TEST_MD2, 

7 TEST_MD2_SOURCE, TEST_MD3, TEST_MD3_SOURCE, 

8 TEST_YAML_SOURCE) 

9 

10 

11class TestScanMD(unittest.TestCase): 

12 """Test functions in scan_md.""" 

13 

14 def test_filter_urls(self) -> None: 

15 """Test if URL lists are correctly filtered.""" 

16 html, date = scan_md.convert_markdown(TEST_MD1_SOURCE) 

17 urls: list[str] = scan_md.get_urls(html) 

18 # Filtered list should only contain example.com and both Github URLs 

19 correct_filtered_result: list[str] = [ 

20 "example.com", "github.com", "https://github.com/pypa/pip" 

21 ] 

22 # Result from filter_urls should have correct length and should contain 

23 # all elements of filtered_result 

24 self.assertEqual(len(scan_md.filter_urls(urls)), 3) 

25 self.assertTrue( 

26 all(url in scan_md.filter_urls(urls) for url in correct_filtered_result) 

27 ) 

28 # Filtering STABLE_URLS should result in empty list 

29 self.assertEqual(scan_md.filter_urls(scan_md.STABLE_URLS), []) 

30 

31 def test_format_date(self) -> None: 

32 """Test if date is correctly formatted or returned as None.""" 

33 # Accepted inputs and expected results 

34 self.assertEqual(scan_md.format_date("2010-03-07"), "201003070000") 

35 self.assertEqual(scan_md.format_date("2013-04-11 14:50"), "201304111450") 

36 self.assertEqual(scan_md.format_date("2015-05-17 17:49:59"), "201505171749") 

37 self.assertEqual(scan_md.format_date("20.08.2018"), "201808200000") 

38 # Hugo default 

39 self.assertEqual( 

40 scan_md.format_date("2019-03-26T08:47:11+01:00"), "201903260847" 

41 ) 

42 # A human readable date string 

43 self.assertEqual( 

44 scan_md.format_date("Monday, October 10, 2014"), "201410100000" 

45 ) 

46 # Various inputs that should raise dateutil ParserError and thus return None 

47 self.assertEqual(scan_md.format_date("some wrong string"), None) 

48 self.assertEqual(scan_md.format_date("2014-04-10 13:10:99"), None) 

49 

50 def test_convert_markdown(self) -> None: 

51 """Test if date correctly extracted from metadata.""" 

52 html, date = scan_md.convert_markdown(TEST_MD1_SOURCE) 

53 self.assertEqual(date, "2014-04-28") 

54 html, date = scan_md.convert_markdown(TEST_YAML_SOURCE) 

55 self.assertEqual(date, "2014-04-28") 

56 html, date = scan_md.convert_markdown(TEST_MD2_SOURCE) 

57 self.assertEqual(date, None) 

58 html, date = scan_md.convert_markdown(TEST_MD3_SOURCE) 

59 self.assertEqual(date, None) 

60 

61 def test_get_urls(self) -> None: 

62 """Test if URLs are correctly extracted from HTML.""" 

63 html, date = scan_md.convert_markdown(TEST_MD1_SOURCE) 

64 # All URLs contained in the test file 

65 full_url_list: list[str] = [ 

66 "example.com", "example.com", 

67 "github.com", "https://github.com/pypa/pip", 

68 "https://web.archive.org/web/20000622042643/http://www.google.com/", 

69 "https://doi.org/10.1080/32498327493.2014.358732798", 

70 "{filename}/blog/2012/2012-02-05-an-even-older-blogpost.md" 

71 ] 

72 self.assertEqual(scan_md.get_urls(html), full_url_list) 

73 

74 @mock.patch("archive_md_urls.scan_md.format_date") 

75 def test_scan_md(self, mock_format_date: Any) -> None: 

76 """Test if format_date is called with correct value.""" 

77 # No date at all should result in calling format_date with trimmed string 

78 scan_md.scan_md(TEST_MD2_SOURCE, TEST_MD2) 

79 mock_format_date.assert_called_with("fake-blogp") 

80 # Test with date in metadata 

81 scan_md.scan_md(TEST_MD1_SOURCE, TEST_MD1) 

82 mock_format_date.assert_called_with("2014-04-28") 

83 # Test with date in file name only 

84 scan_md.scan_md(TEST_MD3_SOURCE, TEST_MD3) 

85 mock_format_date.assert_called_with("2014-04-28")