diff --git a/wpull/document/css.py b/wpull/document/css.py index 2ee8d118..e81cf60b 100644 --- a/wpull/document/css.py +++ b/wpull/document/css.py @@ -12,7 +12,7 @@ class CSSReader(BaseDocumentDetector, BaseTextStreamReader): '''Cascading Stylesheet Document Reader.''' - URL_PATTERN = r'''url\(\s*(['"]?)(.{1,500}?)(?:\1)\s*\)''' + URL_PATTERN = r'''url\(\s*(['"]?)(.*?)(?:\1)\s*\)''' IMPORT_URL_PATTERN = r'''@import\s*(?:url\()?['"]?([^\s'")]{1,500}).*?;''' URL_REGEX = re.compile(r'{}|{}'.format(URL_PATTERN, IMPORT_URL_PATTERN)) BUFFER_SIZE = 1048576 diff --git a/wpull/scraper/html_test.py b/wpull/scraper/html_test.py index 175a8349..14556522 100644 --- a/wpull/scraper/html_test.py +++ b/wpull/scraper/html_test.py @@ -52,6 +52,7 @@ def test_html_scraper_links(self): 'http://example.com/style_import_quote_url.css', 'http://example.com/style_single_quote_import.css', 'http://example.com/style_double_quote_import.css', + 'http://example.com/bg.png', 'http://example.com/link_href.css', 'http://example.com/script.js', 'http://example.com/body_background.png', diff --git a/wpull/testing/samples/many_urls.html b/wpull/testing/samples/many_urls.html index 08a9a624..db050d0e 100644 --- a/wpull/testing/samples/many_urls.html +++ b/wpull/testing/samples/many_urls.html @@ -9,6 +9,9 @@ @import 'style_single_quote_import.css'; @import "style_double_quote_import.css"; +