diff --git a/wpull/scraper/html.py b/wpull/scraper/html.py index a1672515..efff9a04 100644 --- a/wpull/scraper/html.py +++ b/wpull/scraper/html.py @@ -264,6 +264,7 @@ class ElementWalker(object): LINK_ATTRIBUTES = frozenset([ 'action', 'archive', 'background', 'cite', 'classid', 'codebase', 'data', 'href', 'longdesc', 'profile', 'src', + 'poster', 'usemap', 'dynsrc', 'lowsrc', ]) @@ -293,6 +294,9 @@ class ElementWalker(object): 'table': {'background': ATTR_INLINE}, 'td': {'background': ATTR_INLINE}, 'th': {'background': ATTR_INLINE}, + 'video': {'src': ATTR_INLINE, 'poster': ATTR_INLINE}, + 'audio': {'src': ATTR_INLINE, 'poster': ATTR_INLINE}, + 'source': {'src': ATTR_INLINE}, } '''Mapping of element tag names to attributes containing links.''' DYNAMIC_ATTRIBUTES = ('onkey', 'oncli', 'onmou') diff --git a/wpull/scraper/html_test.py b/wpull/scraper/html_test.py index 175a8349..1e5c4f21 100644 --- a/wpull/scraper/html_test.py +++ b/wpull/scraper/html_test.py @@ -65,7 +65,12 @@ def test_html_scraper_links(self): 'http://example.com/applet/applet_src.class', 'http://example.com/bgsound.mid', 'http://example.com/audio_src.wav', + 'http://example.com/audio_poster.jpeg', 'http://example.net/source_src.wav', + 'http://example.com/video_src.webm', + 'http://example.com/video_poster.jpeg', + 'http://example.net/track_src.vtt', + 'http://example.net/source_src.webm', 'http://example.com/embed_src.mov', 'http://example.com/fig_src.png', 'http://example.com/frame_src.html', diff --git a/wpull/testing/samples/many_urls.html b/wpull/testing/samples/many_urls.html index 08a9a624..b77e287a 100644 --- a/wpull/testing/samples/many_urls.html +++ b/wpull/testing/samples/many_urls.html @@ -41,9 +41,13 @@ -