diff --git a/app/settings/dev.py b/app/settings/dev.py index a48ccb1..e60e4df 100644 --- a/app/settings/dev.py +++ b/app/settings/dev.py @@ -20,7 +20,7 @@ # e.g. when you are no longer using the import actions WAGTAILREDIRECTS_AUTO_CREATE = False -# Cooment out if you need to use timezone aware datetimes +# Comment out if you need to use timezone aware datetimes USE_TZ = False # Shell plus config diff --git a/wordpress.docker/docker-compose.yml b/wordpress.docker/docker-compose.yml index ca28b1e..5b57008 100644 --- a/wordpress.docker/docker-compose.yml +++ b/wordpress.docker/docker-compose.yml @@ -25,6 +25,7 @@ services: image: mysql:5.7 platform: linux/x86_64 restart: always + platform: linux/x86_64 env_file: - .env volumes: @@ -32,7 +33,7 @@ services: healthcheck: test: [ "CMD", "mysqladmin", "ping", "-h", "localhost" ] interval: 5s - timeout: 5s + timeout: 10s retries: 5 volumes: diff --git a/wp_connector/importer.py b/wp_connector/importer.py index 98a9e8d..5fd967d 100644 --- a/wp_connector/importer.py +++ b/wp_connector/importer.py @@ -83,6 +83,41 @@ class can be used for all models. # processing foreign keys here as we have access to all the data now self.process_one_to_many(self.one_to_many) self.process_many_to_many(self.many_to_many) + self.process_clean_fields(self.clean_fields) + + @staticmethod + def get_cleaned_data(process_clean_fields, item): + cleaned_data = [] + + def clean_content(content): + # currently just removes whitespace incl. newlines + # from the start and end of the content + # remove br tags if they are a top level tag using beautifulsoup + # remove empty paragraphs + + soup = bs(content, "html.parser") + tags = [] + for tag in soup.find_all("br", recursive=False): + tag.decompose() + + for tag in soup.find_all("p", recursive=False): + if not tag.text.strip(): + tag.decompose() + + for tag in soup.find_all(recursive=True): + tags.append(str(tag)) + + return "".join(tags) + + for field in process_clean_fields(): + for key, value in field.items(): + cleaned_data.append( + { + key: clean_content(jmespath.search(value, item)), + } + ) + + return cleaned_data @staticmethod def get_many_to_many_data(process_many_to_many_keys, item): @@ -221,3 +256,12 @@ def process_many_to_many(self, objects): for related_object in related_objects: getattr(obj, field).add(related_object) + + @staticmethod + def process_clean_fields(cleaned_fields): + sys.stdout.write("Processing clean fields...\n") + for obj in cleaned_fields: + for field in obj.cleaned_data: + for key, value in field.items(): + setattr(obj, key, value) + obj.save() diff --git a/wp_connector/models/page.py b/wp_connector/models/page.py index fbf413b..53740f3 100644 --- a/wp_connector/models/page.py +++ b/wp_connector/models/page.py @@ -81,7 +81,7 @@ def process_clean_fields(): """Clean the content.""" return [ { - "content": "wp_cleaned_content", + "wp_cleaned_content": "content.rendered", } ] diff --git a/wp_connector/models/post.py b/wp_connector/models/post.py index af30009..378532d 100644 --- a/wp_connector/models/post.py +++ b/wp_connector/models/post.py @@ -94,7 +94,7 @@ def process_clean_fields(): """Clean the content.""" return [ { - "content": "wp_cleaned_content", + "wp_cleaned_content": "content.rendered", } ]