From aaf34171f4127da0abb4722c1a2a8f5e5f3f8793 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Sat, 4 May 2024 10:06:21 +0200 Subject: [PATCH 01/19] Remove all legacy code --- markdown2confluence/main.py | 14 -- markdown2confluence/publisher.py | 420 +------------------------------ 2 files changed, 9 insertions(+), 425 deletions(-) diff --git a/markdown2confluence/main.py b/markdown2confluence/main.py index 20e73be..ae38fd5 100644 --- a/markdown2confluence/main.py +++ b/markdown2confluence/main.py @@ -22,20 +22,6 @@ def main(): markdown_source_ref=config.markdown_source_ref, confluence_ignorefile=config.confluence_ignorefile) - logging.basicConfig(level=logging.INFO) - logging.debug(config) - - pages = publisher.search_pages() - publisher.delete_pages(pages_id_list=pages) - - time.sleep(5) # Sleep for 5 seconds to allow the delete to fully complete - - # Publish the markdown files from the specified folder - publisher.publish_folder( - folder=config.markdown_folder, - parent_page_id=config.confluence_parent_page_id - ) - if __name__ == "__main__": main() diff --git a/markdown2confluence/publisher.py b/markdown2confluence/publisher.py index ee87fd4..478b72b 100644 --- a/markdown2confluence/publisher.py +++ b/markdown2confluence/publisher.py @@ -1,21 +1,4 @@ -import backoff -import json -import logging -import os -import random -import re -import requests -import string - from atlassian import Confluence -from markdown import markdown -from pathspec import PathSpec -from pathspec.patterns import GitWildMatchPattern -from requests.auth import HTTPBasicAuth - -# Set up basic configuration for logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) class Publisher: @@ -32,404 +15,19 @@ def __init__(self, url, username, password, space_id, self.parent_page_id = parent_page_id self.page_title_suffix = page_title_suffix self.page_label = page_label - - # TODO: remove and use confluence client - self.url = url - self.username = username - self.password = password self.markdown_folder = markdown_folder self.markdown_source_ref = markdown_source_ref - self.ignore_patterns = self.load_ignore_patterns(confluence_ignorefile) - - # def publish_page(self, title, content): - # title_with_suffix = f"{title}{self.page_title_suffix}" - # existing_page = self.confluence.get_page_by_title( - # space=self.space_id, - # title=title_with_suffix, - # expand='version' - # ) - # if existing_page: - # return - # # return self.update_page( - # # page_id=existing_page['id'], - # # title=title_with_suffix, - # # content=content - # # ) - # else: - # return self.confluence.create_page( - # space=self.space_id, - # title=title_with_suffix, - # body=content, - # parent_id=self.parent_page_id, - # type='page' - # ) - # - # def update_page(self, page_id, title, content): - # return self.confluence.update_page( - # page_id=page_id, - # title=title, - # body=content, - # type='page', - # ) - # - # def delete_page(self, page_id): - # return self.confluence.remove_page(page_id) - # - # - - def create_page(self, title, content, parent_page_id): - - # descripe json query - newPageJSONQueryString = """ - { - "type": "page", - "title": "DEFAULT PAGE TITLE", - "ancestors": [ - { - "id": 111 - } - ], - "space": { - "key": "DEFAULT KEY" - }, - "body": { - "storage": { - "value": "DEFAULT PAGE CONTENT", - "representation": "storage" - } - } - } - """ - - # load json from string - newPagejsonQuery = json.loads(newPageJSONQueryString) - - # the key of Confluence space for content publishing - newPagejsonQuery['space']['key'] = self.space_id - - # check of input of the ParentPageID - if parent_page_id is None: - # this is the root of out pages tree - newPagejsonQuery['ancestors'][0]['id'] = self.parent_page_id - else: - newPagejsonQuery['ancestors'][0]['id'] = str( - parent_page_id) # this is the branch of our tree - - newPagejsonQuery['title'] = title + " " + \ - self.page_title_suffix + \ - " #" + self.generate_random_string(length=3) - - # add content if the page from the input parameter - newPagejsonQuery['body']['storage']['value'] = ( - '' - 'Do not make changes here' - '' - '

This page is autogenerated. Make changes in the ' - f'GitHub repository

' - '
' - '
' + content - ) - - logging.info("Create new page: " + newPagejsonQuery['title']) - logging.debug("with content: " + - newPagejsonQuery['body']['storage']['value']) - logging.debug(json.dumps(newPagejsonQuery, indent=4, sort_keys=True)) - - # make call to create new page - logging.debug("Calling URL: " + self.url+"/content/") - - response = requests.post( - url=self.url+"/content/", - json=newPagejsonQuery, - auth=HTTPBasicAuth(self.username, self.password), - verify=True) - - logging.debug(response.status_code) - if response.status_code == 200: - logging.info("Created successfully") - logging.debug(json.dumps(json.loads( - response.text), indent=4, sort_keys=True)) - - # return new page id - logging.debug("Returning created page id: " + - json.loads(response.text)['id']) - return json.loads(response.text)['id'] - - # - # Function for searching pages with SEARCH TEST in the title - # - - def search_pages(self): - # make call using Confluence query language - # GET /rest/api/search?cql=text~%7B%22SEARCH%20PATTERN%22%7D+and+type=page+and+space=%2212345%22&limit=1000 HTTP/1.1" 200 - # "cqlQuery": "parent=301176119 and text~{\"SEARCH PATTERN\"} and type=page and space=\"12345\"" - - logging.debug("Calling URL: " + self.url + "/search?cql=parent=" + self.parent_page_id + - "+and+text~{\"" + self.page_title_suffix + - "\"}+and+type=page+and+space=\"" + - self.space_id + - "\"&limit=1000") - - def fatal_code(e): - return not 500 <= e.response.status_code < 600 - - # Exponential backoff for timeouts and server errors (500-599), fail on fatal errors - - @backoff.on_exception(backoff.expo, requests.exceptions.Timeout, max_tries=8) - @backoff.on_exception(backoff.expo, - requests.exceptions.RequestException, - giveup=fatal_code, - max_tries=4) - def get_request(url, auth): - response = requests.get( - url=url, - auth=auth, - verify=True - ) - # Raise an HTTPError for bad responses so it can be caught by backoff or fail the script - response.raise_for_status() - return response - - # Modify your existing code structure to use the get_request function - try: - response = get_request( - url=self.url + "/search?cql=text~{\"" + self.page_title_suffix + - "\"}+and+type=page+and+space=\"" + - self.space_id + - "\"&limit=1000", - auth=HTTPBasicAuth(self.username, self.password) - ) - except requests.exceptions.HTTPError as http_err: - logger.error(f"HTTP error occurred: {http_err}") - raise SystemExit(http_err) - except requests.exceptions.ConnectionError as conn_err: - logger.error(f"Connection error occurred: {conn_err}") - raise SystemExit(conn_err) - except requests.exceptions.Timeout as timeout_err: - # Should not reach here if `max_tries` has not been exceeded - logger.error( - f"Timeout error occurred after retries: {timeout_err}") - raise SystemExit(timeout_err) - except requests.exceptions.RequestException as req_err: - logger.error(f"Error making request: {req_err}") - raise SystemExit(req_err) - - logging.debug(response.status_code) - logging.debug(response.text) - logging.debug(json.dumps(json.loads( - response.text), indent=4, sort_keys=True)) - - # extract page's IDs from response JSON - results = json.loads(response.text) - foundPages = [] - - for result in results['results']: - foundPages.append(result['content']['id']) # add found page id - logging.info("Found page: " + result['content']['id'] + - " with title: " + result['content']['title']) - - logging.debug("Found pages in space " + self.space_id + " and parent page: " + - self.parent_page_id + " and search text: " + - self.page_title_suffix + ": " + str(foundPages)) - - return foundPages - - def delete_pages(self, pages_id_list): - - deletedPages = [] - - for page in pages_id_list: - logging.info("Delete page: " + str(page)) - logging.debug("Calling URL: " + - self.url + "/content/" + str(page)) - response = requests.delete( - url=self.url + "/content/" + str(page), - auth=HTTPBasicAuth(self.username, self.password), - verify=True) - logging.debug("Delete status code: " + str(response.status_code)) - if response.status_code == 204: - logging.info("Deleted successfully") - - return deletedPages - - def attach_file(self, page_id, attached_file): - """ - Attach a file to a Confluence page. - - Args: - page_id (str): ID of the Confluence page to attach the file to. - attached_file (file): The file to be attached. - - Returns: - str: The ID of the attached file or None if the attachment failed. - """ - - # Construct the API endpoint URL - api_url = f"{self.url}/content/{page_id}/child/attachment" - - # Log the API call - logging.debug(f"Calling URL: {api_url}") - - # Set up file and comment data, headers, and disable SSL verification - attached_file_structure = {'file': attached_file} - attached_values = {'comment': 'File was attached by the script'} - - # TODO: Why do we need nocheck? document properly or remove - attached_header = { - "Accept": "application/json", - "X-Atlassian-Token": "nocheck" # Disable token check to avoid 403 status code - } - - # Make the POST request to attach the file - response = requests.post( - url=api_url, - files=attached_file_structure, - data=attached_values, - auth=HTTPBasicAuth(self.username, self.password), - headers=attached_header, - verify=True # Not recommended in production - ) - - # Log the response status code - logging.debug(response.status_code) - - if response.status_code == 200: - # Log success and parse JSON response - logging.info("File was attached successfully") - response_data = json.loads(response.text) - logging.debug(json.dumps(response_data, indent=4, sort_keys=True)) - - # Extract and return the ID of the attached file - attached_file_id = response_data['results'][0]['id'] - logging.debug(f"Returning attached file id: {attached_file_id}") - return attached_file_id - else: - # Log failure and return None - logging.error("File has not been attached") - return None - - # Confluence pages need unique titles - add some random strings at the end - - def generate_random_string(self, length=10): - return ''.join(random.choices(string.ascii_letters + string.digits, k=length)) - - def load_ignore_patterns(self, path): - if not path: - return [] - - patterns = [] - try: - with open(path, 'r') as file: - patterns = [line.strip() for line in file if line.strip() - and not line.startswith('#')] - print("loaded ignorepatterns", patterns) - except FileNotFoundError: - print(f"Unable to locate {path}, no patterns to ignore.") - return patterns - - # NOTE: Move this to __init__ when refactoring to Publisher class - - # Function to check if a file matches any of the ignore patterns - - def is_ignored(self, file_path): - spec = PathSpec.from_lines(GitWildMatchPattern, self.ignore_patterns) - return spec.match_file(file_path) - - def folderContainsMarkdown(self, folder_path): - for entry in os.scandir(folder_path): - if entry.is_dir() and self.folderContainsMarkdown(entry.path): - return True - elif entry.is_file() and entry.name.endswith('.md'): - return True - return False - - def publish_folder(self, folder, parent_page_id): - logging.info(f"Publishing folder: {folder}") - for entry in os.scandir(folder): - if self.is_ignored(entry.path): - return - if entry.is_dir(): - # Recursively publish directories that contain markdown files - if self.folderContainsMarkdown(entry.path): - self.publish_directory(entry, parent_page_id) - - elif entry.is_file() and entry.name.endswith('.md'): - # Publish only markdown files - self.publish_file(entry, parent_page_id) - - elif entry.is_symlink(): - logging.info(f"Found symlink: {entry.path}") - - def publish_directory(self, entry, parent_page_id): - logging.info(f"Found directory: {entry.path}") - current_page_id = self.create_page( - title=entry.name, - content="", - parent_page_id=parent_page_id, - ) - self.publish_folder(entry.path, current_page_id) - - def publish_file(self, entry, parent_page_id): - logging.info(f"Found file: {entry.path}") - - if entry.name.lower().endswith('.md'): - self.process_markdown_file(entry, parent_page_id) - else: - logging.info( - f"File: {entry.path} is not a MD file. Publishing has been rejected.") - - def process_markdown_file(self, entry, parent_page_id): - new_file_content, files_to_upload = self.process_markdown_content( - entry.path) - - page_id_for_file_attaching = self.create_page( - title=entry.name, - content=markdown(new_file_content, extensions=[ - 'markdown.extensions.tables', 'fenced_code']), - parent_page_id=parent_page_id, - ) - - self.upload_attachments(files_to_upload, page_id_for_file_attaching) - - def process_markdown_content(self, file_path): - new_file_content = "" - files_to_upload = [] - - with open(file_path, 'r', encoding="utf-8") as md_file: - for line in md_file: - result = re.findall(r"\A!\[.*]\((?!http)(.*)\)", line) - if result: - result = result[0] - logging.debug(f"Found file for attaching: {result}") - print(f"Found file for attaching: {result}") - files_to_upload.append(result) - new_file_content += f" " - else: - new_file_content += line + # self.ignore_patterns = self.load_ignore_patterns(confluence_ignorefile) - return new_file_content, files_to_upload + def publish_page(self, title, content): + pass - def upload_attachments(self, files_to_upload, page_id_for_file_attaching): - if files_to_upload: - for file in files_to_upload: - print("file: ", file) + def update_page(self, page_id, title, content): + pass - # NOTE: Find the problem that this solves and fix it in a better way - if file.startswith('/'): - file = '.' + file + def delete_page(self, page_id): + pass - image_path = os.path.join( - self.markdown_folder, file) - if os.path.isfile(image_path): - logging.info( - f"Attaching file: {image_path} to the page: {page_id_for_file_attaching}") - with open(image_path, 'rb') as attached_file: - self.attach_file( - page_id=page_id_for_file_attaching, - attached_file=attached_file, - ) - else: - logging.error( - f"File: {image_path} not found. Nothing to attach") + def _attach_file(self, page_id, attached_file): + pass From eb72512140ed1095937daee6902e84fe2bbd5a41 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Sat, 4 May 2024 10:31:40 +0200 Subject: [PATCH 02/19] Add Architecture scaffold and versioning --- README.md | 6 +-- markdown2confluence/__init_.py | 2 +- markdown2confluence/config.py | 87 ++++++++++++++++--------------- markdown2confluence/confluence.py | 22 ++++---- markdown2confluence/logo.py | 19 +++++++ markdown2confluence/main.py | 36 ++++++------- markdown2confluence/publisher.py | 38 ++++---------- markdown2confluence/util.py | 48 +++++++++++++++++ markdown2confluence/version.py | 1 + setup.py | 2 +- tests/unit/test_converter.py | 0 11 files changed, 159 insertions(+), 102 deletions(-) create mode 100644 markdown2confluence/logo.py create mode 100644 markdown2confluence/util.py create mode 100644 markdown2confluence/version.py delete mode 100644 tests/unit/test_converter.py diff --git a/README.md b/README.md index 8b3bd33..8ec2ef4 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Convert your Markdown files into Confluence pages with ease using the `markdown2 ## Prerequisites -Before you get started, you will need the space ID of your Confluence space and the ID of the parent page where you want to create new pages. +Before you get started, ensure that you have Python version 3.9 or higher installed on your system. You will also need the space ID of your Confluence space and the ID of the parent page where you want to create new pages. ## Configuration @@ -16,14 +16,14 @@ Configure your environment with the following variables before running the scrip - `CONFLUENCE_SPACE_ID`: The key of the Confluence space where the pages will be created - `CONFLUENCE_PARENT_PAGE_ID`: The ID of the parent Confluence page under which new pages will be created - `MARKDOWN_FOLDER`: The path to the folder containing markdown files -- `MARKDOWN_SOURCE_REF`: The url of the markdown source, e.g. and url to your repo +- `MARKDOWN_SOURCE_REF`: The url of the markdown source, e.g., a URL to your repo - `CONFLUENCE_PAGE_LABEL`: A label to apply to the Confluence pages to manage them as a group Optionally, you can also set: - `CONFLUENCE_PAGE_TITLE_SUFFIX`: A suffix to append to the title of Confluence pages, e.g., '(autogenerated)' - `CONFLUENCE_IGNOREFILE`: Path to a file containing patterns to ignore when publishing -## Usage +## Usage (docker) To upload sample markdown files to Confluence, run the following Docker command, replacing `VAR1`, `VAR2`, etc., with your Confluence configuration values: diff --git a/markdown2confluence/__init_.py b/markdown2confluence/__init_.py index c13685a..9fd9deb 100644 --- a/markdown2confluence/__init_.py +++ b/markdown2confluence/__init_.py @@ -7,4 +7,4 @@ from .converter import Converter from .publisher import Publisher from .config import Config - +from .util import Logger diff --git a/markdown2confluence/config.py b/markdown2confluence/config.py index 8dae316..7dd951e 100644 --- a/markdown2confluence/config.py +++ b/markdown2confluence/config.py @@ -1,32 +1,49 @@ import argparse import os +from markdown2confluence.util import Logger + +logger = Logger(__name__).get_logger() + class Config: def __init__(self, args=None): if args is None: args = parse_args() - self.confluence_url = ( - args.confluence_url or - os.environ.get('CONFLUENCE_URL', '') - ).rstrip('/') - self.confluence_username = ( - args.confluence_username or - os.environ.get('CONFLUENCE_USERNAME') - ) - self.confluence_password = ( - args.confluence_password or - os.environ.get('CONFLUENCE_PASSWORD') - ) - self.confluence_space_id = ( - args.confluence_space_id or - os.environ.get('CONFLUENCE_SPACE_ID') - ) - self.confluence_parent_page_id = ( - args.confluence_parent_page_id or - os.environ.get('CONFLUENCE_PARENT_PAGE_ID') - ) + self.confluence = { + 'url': ( + args.confluence_url or + os.environ.get('CONFLUENCE_URL', '') + ).rstrip('/'), + 'username': ( + args.confluence_username or + os.environ.get('CONFLUENCE_USERNAME') + ), + 'password': ( + args.confluence_password or + os.environ.get('CONFLUENCE_PASSWORD') + ), + 'space_id': ( + args.confluence_space_id or + os.environ.get('CONFLUENCE_SPACE_ID') + ), + 'parent_page_id': ( + args.confluence_parent_page_id or + os.environ.get('CONFLUENCE_PARENT_PAGE_ID') + ), + 'page_title_suffix': ( + args.confluence_page_title_suffix or + os.environ.get('CONFLUENCE_PAGE_TITLE_SUFFIX') or + '(autogenerated)' + ), + 'page_label': ( + args.confluence_page_label or + os.environ.get('CONFLUENCE_PAGE_LABEL') or + 'markdown2confluence' + ), + } + self.markdown_folder = ( args.markdown_folder or os.environ.get('MARKDOWN_FOLDER') or @@ -40,33 +57,19 @@ def __init__(self, args=None): args.confluence_ignorefile or os.environ.get('CONFLUENCE_IGNOREFILE') ) - self.confluence_page_title_suffix = ( - args.confluence_page_title_suffix or - os.environ.get('CONFLUENCE_PAGE_TITLE_SUFFIX') or - '(autogenerated)' - ) - self.confluence_page_label = ( - args.confluence_page_label or - os.environ.get('CONFLUENCE_PAGE_LABEL') or - 'markdown2confluence' - ) + self.validate() + logger.debug("initialized logger with config:", self) def validate(self): missing_fields = [] - if not self.confluence_url: - missing_fields.append("confluence_url") - if not self.confluence_username: - missing_fields.append("confluence_username") - if not self.confluence_password: - missing_fields.append("confluence_password") - if not self.confluence_space_id: - missing_fields.append("confluence_space_id") - if not self.confluence_parent_page_id: - missing_fields.append("confluence_parent_page_id") - if not self.confluence_page_title_suffix: - missing_fields.append("confluence_page_title_suffix") + required_fields = ['url', 'username', 'password', 'space_id', + 'parent_page_id', 'page_title_suffix'] + + for key in required_fields: + if not self.confluence.get(key): + missing_fields.append("confluence_{}".format(key)) if missing_fields: raise ValueError("The following configuration fields are " diff --git a/markdown2confluence/confluence.py b/markdown2confluence/confluence.py index b046edd..5e83f27 100644 --- a/markdown2confluence/confluence.py +++ b/markdown2confluence/confluence.py @@ -1,18 +1,22 @@ +from markdown2confluence.util import Logger + +logger = Logger(__name__).get_logger() + class ConfluenceClient: def __init__(self, confluence_config: dict): - """Initialize with API configuration.""" - self.api_endpoint = confluence_config["api_endpoint"] + self.api_endpoint = confluence_config["url"] self.auth = (confluence_config["username"], confluence_config["password"]) - def create_or_update_page(self, title: str, html: str, parent_id=None, - space_key: str, labels=None) -> dict: - """Create or update a Confluence page, applying labels.""" - # Implementation for creating or updating a Confluence page + def create_or_update_page(self, title: str, content: str, parent_id=None): + pass + + def delete_page(self, page_id: str): + pass + + def publish_page(self, title: str, content: str, attachments: list[str]): pass - def delete_page(self, page_id: str) -> dict: - """Delete a Confluence page by ID.""" - # Implementation for deleting a Confluence page + def attach_file(self, page_id: int, attached_file: str): pass diff --git a/markdown2confluence/logo.py b/markdown2confluence/logo.py new file mode 100644 index 0000000..685a71c --- /dev/null +++ b/markdown2confluence/logo.py @@ -0,0 +1,19 @@ +LOGO_TEXT = """ + _ _ + _ __ ___ __ _ _ __| | ____| | _____ ___ __ +| '_ ` _ \\ / _` | '__| |/ / _` |/ _ \\ \\ /\\ / / '_ \\ +| | | | | | (_| | | | < (_| | (_) \\ V V /| | | | +|_| |_| |_|\\__,_|_| |_|\\_\\__,_|\\___/ \\_/\\_/ |_| |_| + + ____ + |___ \\ + __) | + / __/ + |_____| + + __ _ + ___ ___ _ __ / _| |_ _ ___ _ __ ___ ___ + / __/ _ \\| '_ \\| |_| | | | |/ _ \\ '_ \\ / __/ _ \\ +| (_| (_) | | | | _| | |_| | __/ | | | (_| __/ + \\___\\___/|_| |_|_| |_|\\__,_|\\___|_| |_|\\___\\___| +""" diff --git a/markdown2confluence/main.py b/markdown2confluence/main.py index ae38fd5..e03d51b 100644 --- a/markdown2confluence/main.py +++ b/markdown2confluence/main.py @@ -1,26 +1,24 @@ -import logging -import time - -from config import Config -from converter import Converter from publisher import Publisher +from config import Config +from util import Logger +from logo import LOGO_TEXT +import pkg_resources + +config = Config() +logger = Logger(__name__).get_logger() + + +def logo_and_version(): + print(LOGO_TEXT) + version = pkg_resources.get_distribution("markdown2confluence").version + print(f"Version: {version}\n") def main(): - config = Config() - - converter = Converter() - publisher = Publisher( - url=config.confluence_url, - username=config.confluence_username, - password=config.confluence_password, - space_id=config.confluence_space_id, - parent_page_id=config.confluence_parent_page_id, - page_title_suffix=config.confluence_page_title_suffix, - page_label=config.confluence_page_label, - markdown_folder=config.markdown_folder, - markdown_source_ref=config.markdown_source_ref, - confluence_ignorefile=config.confluence_ignorefile) + logo_and_version() + logger.info("Started markdown2confluence") + + Publisher().publish_folder(config.markdown_folder) if __name__ == "__main__": diff --git a/markdown2confluence/publisher.py b/markdown2confluence/publisher.py index 478b72b..5d30170 100644 --- a/markdown2confluence/publisher.py +++ b/markdown2confluence/publisher.py @@ -1,33 +1,17 @@ -from atlassian import Confluence +from markdown2confluence.config import Config +from markdown2confluence.util import Logger +from markdown2confluence.confluence import ConfluenceClient + +config = Config() +logger = Logger(__name__).get_logger() class Publisher: - def __init__(self, url, username, password, space_id, - parent_page_id, page_title_suffix, - page_label, markdown_folder, - markdown_source_ref, confluence_ignorefile): - self.confluence = Confluence( - url=url, - username=username, - password=password + def __init__(self, confluence: ConfluenceClient = None): + self.confluence = confluence or ConfluenceClient( + confluence_config=config.confluence ) - self.space_id = space_id - self.parent_page_id = parent_page_id - self.page_title_suffix = page_title_suffix - self.page_label = page_label - self.markdown_folder = markdown_folder - self.markdown_source_ref = markdown_source_ref - - # self.ignore_patterns = self.load_ignore_patterns(confluence_ignorefile) - - def publish_page(self, title, content): - pass - - def update_page(self, page_id, title, content): - pass - - def delete_page(self, page_id): - pass + logger.info("Initialized Publisher") - def _attach_file(self, page_id, attached_file): + def publish_folder(self, folder_path: str): pass diff --git a/markdown2confluence/util.py b/markdown2confluence/util.py new file mode 100644 index 0000000..a667a60 --- /dev/null +++ b/markdown2confluence/util.py @@ -0,0 +1,48 @@ +import os +from logging.handlers import RotatingFileHandler +import logging + + +class Logger: + def __init__(self, name: str, log_file: str = 'markdown2confluence.log', + level: int = logging.INFO): + """ + Initialize the Logger with a specified name and log file. + + :param name: Name of the logger, usually __name__ is passed + to get the module's name. + :param log_file: File path for the log file. + :param level: Logging level, default is logging.INFO. + """ + self.logger = logging.getLogger(name) + self.logger.setLevel(level) + + # Create log directory if it doesn't exist + log_dir = os.path.dirname(log_file) + if log_dir and not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Create a file handler which logs even debug messages + file_handler = RotatingFileHandler( + log_file, maxBytes=1024*1024*5, backupCount=5) + file_handler.setLevel(level) + + # Create a console handler with a higher log level + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.ERROR) + + # Create formatter and add it to the handlers + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + file_handler.setFormatter(formatter) + console_handler.setFormatter(formatter) + + # Add the handlers to the logger + self.logger.addHandler(file_handler) + self.logger.addHandler(console_handler) + + def get_logger(self): + """ + Return the configured logger. + """ + return self.logger diff --git a/markdown2confluence/version.py b/markdown2confluence/version.py new file mode 100644 index 0000000..d3ec452 --- /dev/null +++ b/markdown2confluence/version.py @@ -0,0 +1 @@ +__version__ = "0.2.0" diff --git a/setup.py b/setup.py index 8960abb..8a5bfcc 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='markdown2confluence', - version='0.1.0-alpha', + version='0.3.0-alpha', packages=find_packages(), install_requires=[ # dependencies diff --git a/tests/unit/test_converter.py b/tests/unit/test_converter.py deleted file mode 100644 index e69de29..0000000 From 6ef26ab31767b0ae71eaa8c55540f0aacf800d5f Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Sat, 4 May 2024 12:10:54 +0200 Subject: [PATCH 03/19] Add .gptcontext --- .gptcontext | 110 +++++++++++++++++++++++----------------------------- 1 file changed, 49 insertions(+), 61 deletions(-) diff --git a/.gptcontext b/.gptcontext index f9da741..01ba41c 100644 --- a/.gptcontext +++ b/.gptcontext @@ -2,23 +2,13 @@ Additional context is provided below. Preferences for python code: - adhere to common style conventions, e.g. PEP8 -- keep lines under 80 characters long +- you MUST keep lines under 80 characters long Markdown2confluence pushes a folder containing markdown files and pushes them to confluence, with a page structure like the file and folder structure of the markdown files, and ignoring any non-markdown files. Required behavior: -All pages managed by markdown2confluence contains $CONFLUENCE_PAGE_TITLE_SUFFIX, e.g. '(autogenerated)'. New pages are created with this suffix, and on subsequent runs any pages with the suffix (or label, TBD) are overwritten or deleted. -Depending on how confluence labels work it might be best to use labels instead. If using labels, refuse to delete any pages that does not have the page title suffix. -Any markdown that contains full or relative links to local media files should be published as pages with attached media. Relative links in markdown to local media are resolved from the location of the markdown file. Full-path links in markdown are resolved from the $MARKDOWN_FOLDER - - -Currently I am working on: -- Publisher class in publish.py contains the old code for now, I am moving - functionality to the other classes. -- Change from directly using requests to using the confluence client from - atlassian -- Use labels instead of only relying on the suffix (previously called search - pattern) +- All pages managed by markdown2confluence contains a suffix, e.g. '(autogenerated)'. New pages are created with this suffix, and on subsequent runs any pages with the suffix (or label, TBD) are overwritten or deleted. Depending on how confluence labels work it might be best to use labels instead. If using labels, refuse to delete any pages that does not have the page title suffix. +- Any markdown that contains full or relative links to local media files should be published as pages with attached media. Relative links in markdown to local media are resolved from the location of the markdown file. Full-path links in markdown are resolved from the $MARKDOWN_FOLDER file structure: @@ -34,7 +24,8 @@ markdown2confluence/ │ ├── main.py │ ├── confluence.py │ ├── config.py -│ ├── file_manager.py +│ ├── parser.py +│ ├── util.py │ └── publisher.py ├── README.md ├── requirements.txt @@ -48,7 +39,7 @@ markdown2confluence/ │ └── test_integration.py └── unit ├── __init__.py - ├── test_file_manager.py + ├── test_parser.py ├── test_confluence.py └── test_publisher.py @@ -79,63 +70,60 @@ CONFLUENCE_IGNOREFILE #### Components and Their Key Interfaces -1. **ConfluenceClient** - -Responsible for direct interactions with the Confluence API, handling operations like page creation, updates, deletion, and labeling with retries and backoff for robustness. - -```python -class ConfluenceClient: - def __init__(self, confluence_config: dict): - """Initialize with API configuration.""" - - def create_or_update_page(self, title: str, html: str, parent_id=None, space_key: str, labels=None) -> dict: - """Create or update a Confluence page, applying labels.""" - - def delete_page(self, page_id: str) -> dict: - """Delete a Confluence page by ID.""" -``` - -2. **Publisher** +1. **Publisher** -Orchestrates the conversion of Markdown to HTML and the subsequent publishing to Confluence, respecting the original directory structure and managing page relationships. +Abstract Publisher class for publishing a content tree, respecting the ContentTree structure and managing page relationships. ```python class Publisher: - def __init__(self, confluence_client: ConfluenceClient, source_directory: str, space_key: str): - """Setup with Confluence client, source directory, and target space key.""" - - def publish(self): - """Main method to start the publishing process.""" - - def traverse_directory(self, directory: str, parent_id=None): - """Recursively traverse directories, converting and uploading Markdown files.""" + @abstractmethod + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + pass + + def publish_content(self, content_tree: ContentTree): + """ + Traverse a content tree and call publish_node on each element. + """ + pass ``` -3. **FileManager** (unchanged, conceptual) +2. **ConfluencePublisher** -Handles file reading and potentially logging or other file outputs, and maybe traversing the file system +Specialized publisher for confluence, implements the publish_node function responsible for creating pages with labels etc in confluence ```python -class FileManager: - def read_file(self, path: str) -> str: - """Read the content of a file.""" -``` - -### Workflow Overview with Snippets +class ConfluencePublisher(Publisher): + def __init__(self, confluence: Confluence = None): + pass -- The process starts with `Publisher`, which is initialized with necessary configurations and an instance of `ConfluenceClient`. - -```python -publisher = Publisher(confluence_client=ConfluenceClient(confluence_config), source_directory="path/to/markdown", space_key="SPACEKEY") -publisher.publish() + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + pass ``` -- `Publisher.publish()` begins the process, invoking `traverse_directory()` to walk through the directory structure, processing each Markdown file by converting it to HTML. - -- For each processed file, `Publisher` uses `ConfluenceClient.create_or_update_page()` to either create a new page or update an existing one in Confluence, applying a predefined label to mark the page as managed by `markdown2confluence`. +3. **Parser** -- Should a page need to be deleted or labels added, `Publisher` utilizes other methods of `ConfluenceClient` like `delete_page()` and maybe `add_labels_to_page()`, ensuring the Confluence space remains synchronized with the source content. +Responsible for parsing the source files from e.g. the file system. -### Conclusion - -This architecture, enriched with interface snippets, outlines a clear, modular approach to converting and managing Markdown content within Confluence, ensuring scalability and maintainability through well-defined responsibilities and robust Confluence API interactions. +```python +@dataclass +class ContentNode: + name: str + content: [str] | None = None + metadata: [dict] | None = None + parent: ['ContentNode'] | None = None + children: dict[str, 'ContentNode'] = field(default_factory=dict) + +@dataclass +class ContentTree: + root: ContentNode = field(default_factory=lambda: ContentNode('root')) + +class Parser(ABC): + @abstractmethod + def parse_directory(self, directory: str) -> ContentTree: + pass + + +class MarkdownParser(Parser): + def parse_directory(self, directory: str) -> ContentTree: + pass +``` From ed0423de3abb0bdcd83bcaf50fa562c03f532d55 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Sun, 5 May 2024 16:44:27 +0200 Subject: [PATCH 04/19] Add pipenv files (Pipfile, Pipfile.lock) and generated requirements.txt --- Dockerfile | 2 +- Pipfile | 15 ++ Pipfile.lock | 386 ++++++++++++++++++++++++++++ README.md | 2 +- markdown2confluence/file_manager.py | 5 - requirements.txt | 34 +-- setup.py | 2 +- 7 files changed, 421 insertions(+), 25 deletions(-) create mode 100644 Pipfile create mode 100644 Pipfile.lock delete mode 100644 markdown2confluence/file_manager.py diff --git a/Dockerfile b/Dockerfile index 20d9dd6..ca7e19c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim +FROM python:3.11-slim WORKDIR /app diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..92de907 --- /dev/null +++ b/Pipfile @@ -0,0 +1,15 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +atlassian-python-api = "*" + +[dev-packages] +setuptools = "*" +pytest-watch = "*" +pytest = "*" + +[requires] +python_version = "3.11" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..052b2f0 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,386 @@ +{ + "_meta": { + "hash": { + "sha256": "ea9c81aba14dc5b093345a799263a3d9ca947578f13f5108be8cc39e32d5dbea" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.11" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "atlassian-python-api": { + "hashes": [ + "sha256:47ac76a171f08537cff64253d1b49a016dc6636dfbba324944c01397d755391c", + "sha256:e6503b2bfeedf100fcabc1d541718a8ab5e6fd757164438fcf4948e6ecea12e4" + ], + "index": "pypi", + "version": "==3.41.11" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", + "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==4.12.3" + }, + "certifi": { + "hashes": [ + "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", + "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" + ], + "markers": "python_version >= '3.6'", + "version": "==2024.2.2" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "deprecated": { + "hashes": [ + "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c", + "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.2.14" + }, + "idna": { + "hashes": [ + "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc", + "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0" + ], + "markers": "python_version >= '3.5'", + "version": "==3.7" + }, + "jmespath": { + "hashes": [ + "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", + "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, + "oauthlib": { + "hashes": [ + "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", + "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918" + ], + "markers": "python_version >= '3.6'", + "version": "==3.2.2" + }, + "requests": { + "hashes": [ + "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", + "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" + ], + "markers": "python_version >= '3.7'", + "version": "==2.31.0" + }, + "requests-oauthlib": { + "hashes": [ + "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", + "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9" + ], + "markers": "python_version >= '3.4'", + "version": "==2.0.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "soupsieve": { + "hashes": [ + "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", + "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7" + ], + "markers": "python_version >= '3.8'", + "version": "==2.5" + }, + "urllib3": { + "hashes": [ + "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", + "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.1" + }, + "wrapt": { + "hashes": [ + "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc", + "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81", + "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", + "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e", + "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca", + "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0", + "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb", + "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487", + "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40", + "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c", + "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060", + "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202", + "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41", + "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9", + "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", + "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664", + "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", + "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362", + "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00", + "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc", + "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", + "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267", + "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956", + "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966", + "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1", + "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228", + "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72", + "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", + "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292", + "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0", + "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0", + "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", + "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c", + "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5", + "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f", + "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73", + "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b", + "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2", + "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593", + "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39", + "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389", + "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf", + "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf", + "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89", + "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c", + "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c", + "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f", + "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440", + "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465", + "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136", + "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b", + "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8", + "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3", + "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8", + "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6", + "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e", + "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f", + "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c", + "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e", + "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8", + "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2", + "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020", + "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35", + "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d", + "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3", + "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537", + "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809", + "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d", + "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a", + "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4" + ], + "markers": "python_version >= '3.6'", + "version": "==1.16.0" + } + }, + "develop": { + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, + "docopt": { + "hashes": [ + "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" + ], + "version": "==0.6.2" + }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, + "packaging": { + "hashes": [ + "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", + "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" + ], + "markers": "python_version >= '3.7'", + "version": "==24.0" + }, + "pluggy": { + "hashes": [ + "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", + "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" + ], + "markers": "python_version >= '3.8'", + "version": "==1.5.0" + }, + "pytest": { + "hashes": [ + "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233", + "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f" + ], + "index": "pypi", + "version": "==8.2.0" + }, + "pytest-watch": { + "hashes": [ + "sha256:06136f03d5b361718b8d0d234042f7b2f203910d8568f63df2f866b547b3d4b9" + ], + "index": "pypi", + "version": "==4.2.0" + }, + "setuptools": { + "hashes": [ + "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987", + "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32" + ], + "index": "pypi", + "version": "==69.5.1" + }, + "watchdog": { + "hashes": [ + "sha256:11e12fafb13372e18ca1bbf12d50f593e7280646687463dd47730fd4f4d5d257", + "sha256:2895bf0518361a9728773083908801a376743bcc37dfa252b801af8fd281b1ca", + "sha256:39cb34b1f1afbf23e9562501673e7146777efe95da24fab5707b88f7fb11649b", + "sha256:45cc09cc4c3b43fb10b59ef4d07318d9a3ecdbff03abd2e36e77b6dd9f9a5c85", + "sha256:4986db5e8880b0e6b7cd52ba36255d4793bf5cdc95bd6264806c233173b1ec0b", + "sha256:5369136a6474678e02426bd984466343924d1df8e2fd94a9b443cb7e3aa20d19", + "sha256:557ba04c816d23ce98a06e70af6abaa0485f6d94994ec78a42b05d1c03dcbd50", + "sha256:6a4db54edea37d1058b08947c789a2354ee02972ed5d1e0dca9b0b820f4c7f92", + "sha256:6a80d5cae8c265842c7419c560b9961561556c4361b297b4c431903f8c33b269", + "sha256:6a9c71a0b02985b4b0b6d14b875a6c86ddea2fdbebd0c9a720a806a8bbffc69f", + "sha256:6c47bdd680009b11c9ac382163e05ca43baf4127954c5f6d0250e7d772d2b80c", + "sha256:6e949a8a94186bced05b6508faa61b7adacc911115664ccb1923b9ad1f1ccf7b", + "sha256:73c7a935e62033bd5e8f0da33a4dcb763da2361921a69a5a95aaf6c93aa03a87", + "sha256:76ad8484379695f3fe46228962017a7e1337e9acadafed67eb20aabb175df98b", + "sha256:8350d4055505412a426b6ad8c521bc7d367d1637a762c70fdd93a3a0d595990b", + "sha256:87e9df830022488e235dd601478c15ad73a0389628588ba0b028cb74eb72fed8", + "sha256:8f9a542c979df62098ae9c58b19e03ad3df1c9d8c6895d96c0d51da17b243b1c", + "sha256:8fec441f5adcf81dd240a5fe78e3d83767999771630b5ddfc5867827a34fa3d3", + "sha256:9a03e16e55465177d416699331b0f3564138f1807ecc5f2de9d55d8f188d08c7", + "sha256:ba30a896166f0fee83183cec913298151b73164160d965af2e93a20bbd2ab605", + "sha256:c17d98799f32e3f55f181f19dd2021d762eb38fdd381b4a748b9f5a36738e935", + "sha256:c522392acc5e962bcac3b22b9592493ffd06d1fc5d755954e6be9f4990de932b", + "sha256:d0f9bd1fd919134d459d8abf954f63886745f4660ef66480b9d753a7c9d40927", + "sha256:d18d7f18a47de6863cd480734613502904611730f8def45fc52a5d97503e5101", + "sha256:d31481ccf4694a8416b681544c23bd271f5a123162ab603c7d7d2dd7dd901a07", + "sha256:e3e7065cbdabe6183ab82199d7a4f6b3ba0a438c5a512a68559846ccb76a78ec", + "sha256:eed82cdf79cd7f0232e2fdc1ad05b06a5e102a43e331f7d041e5f0e0a34a51c4", + "sha256:f970663fa4f7e80401a7b0cbeec00fa801bf0287d93d48368fc3e6fa32716245", + "sha256:f9b2fdca47dc855516b2d66eef3c39f2672cbf7e7a42e7e67ad2cbfcd6ba107d" + ], + "markers": "python_version >= '3.8'", + "version": "==4.0.0" + } + } +} diff --git a/README.md b/README.md index 8ec2ef4..4368339 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Convert your Markdown files into Confluence pages with ease using the `markdown2 ## Prerequisites -Before you get started, ensure that you have Python version 3.9 or higher installed on your system. You will also need the space ID of your Confluence space and the ID of the parent page where you want to create new pages. +Before you get started, ensure that you have Python version 3.11 or higher installed on your system. You will also need the space ID of your Confluence space and the ID of the parent page where you want to create new pages. ## Configuration diff --git a/markdown2confluence/file_manager.py b/markdown2confluence/file_manager.py deleted file mode 100644 index 8e3b1e4..0000000 --- a/markdown2confluence/file_manager.py +++ /dev/null @@ -1,5 +0,0 @@ -class FileManager: - def read_file(self, path: str) -> str: - """Read the content of a file.""" - with open(path, 'r', encoding='utf-8') as file: - return file.read() diff --git a/requirements.txt b/requirements.txt index c185131..426def3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,17 @@ -atlassian-python-api==3.41.11 -backoff==2.2.1 -beautifulsoup4==4.12.3 -certifi==2024.2.2 -charset-normalizer==3.3.2 -Deprecated==1.2.14 -idna==3.7 -jmespath==1.0.1 -Markdown==3.6 -oauthlib==3.2.2 -pathspec==0.12.1 -requests==2.31.0 -requests-oauthlib==2.0.0 -six==1.16.0 -soupsieve==2.5 -urllib3==2.2.1 -wrapt==1.16.0 +-i https://pypi.org/simple +atlassian-python-api==3.41.13 +beautifulsoup4==4.12.3 ; python_full_version >= '3.6.0' +certifi==2024.2.2 ; python_version >= '3.6' +charset-normalizer==3.3.2 ; python_full_version >= '3.7.0' +deprecated==1.2.14 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +idna==3.7 ; python_version >= '3.5' +jmespath==1.0.1 ; python_version >= '3.7' +markdown==3.6 +-e file:///home/otto/dev/gh/innofactororg/markdown2confluence +oauthlib==3.2.2 ; python_version >= '3.6' +requests==2.32.2 ; python_version >= '3.8' +requests-oauthlib==2.0.0 ; python_version >= '3.4' +six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +soupsieve==2.5 ; python_version >= '3.8' +urllib3==2.2.1 ; python_version >= '3.8' +wrapt==1.16.0 ; python_version >= '3.6' diff --git a/setup.py b/setup.py index 8a5bfcc..4bdfbdd 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='markdown2confluence', - version='0.3.0-alpha', + version='0.2.0-rc.5', packages=find_packages(), install_requires=[ # dependencies From ab3f5f37443507105cf43bfa1f4f11d5eeac569c Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Sun, 5 May 2024 16:46:03 +0200 Subject: [PATCH 05/19] Add parser.py, publisher scaffold, pip install -e --- Pipfile | 1 + Pipfile.lock | 8 ++- markdown2confluence/main.py | 6 +- markdown2confluence/parser.py | 96 ++++++++++++++++++++++++++++++++ markdown2confluence/publisher.py | 11 +++- markdown2confluence/util.py | 13 +++-- tests/unit/test_parser.py | 78 ++++++++++++++++++++++++++ 7 files changed, 202 insertions(+), 11 deletions(-) create mode 100644 markdown2confluence/parser.py create mode 100644 tests/unit/test_parser.py diff --git a/Pipfile b/Pipfile index 92de907..bb72659 100644 --- a/Pipfile +++ b/Pipfile @@ -5,6 +5,7 @@ name = "pypi" [packages] atlassian-python-api = "*" +markdown2confluence = {file = ".", editable = true} [dev-packages] setuptools = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 052b2f0..33f9c8c 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ea9c81aba14dc5b093345a799263a3d9ca947578f13f5108be8cc39e32d5dbea" + "sha256": "018c8b383747f100e61899863605378863f1c783d3e36b8ca54c279adcaa2de4" }, "pipfile-spec": 6, "requires": { @@ -160,6 +160,10 @@ "markers": "python_version >= '3.7'", "version": "==1.0.1" }, + "markdown2confluence": { + "editable": true, + "file": "." + }, "oauthlib": { "hashes": [ "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", @@ -330,6 +334,7 @@ "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f" ], "index": "pypi", + "markers": "python_version >= '3.8'", "version": "==8.2.0" }, "pytest-watch": { @@ -345,6 +350,7 @@ "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32" ], "index": "pypi", + "markers": "python_version >= '3.8'", "version": "==69.5.1" }, "watchdog": { diff --git a/markdown2confluence/main.py b/markdown2confluence/main.py index e03d51b..eac71b4 100644 --- a/markdown2confluence/main.py +++ b/markdown2confluence/main.py @@ -5,7 +5,7 @@ import pkg_resources config = Config() -logger = Logger(__name__).get_logger() +logger = Logger("main").get_logger() def logo_and_version(): @@ -17,8 +17,8 @@ def logo_and_version(): def main(): logo_and_version() logger.info("Started markdown2confluence") - - Publisher().publish_folder(config.markdown_folder) + logger.info("Publishing folder %s", config.markdown_folder) + Publisher().publish_directory(config.markdown_folder) if __name__ == "__main__": diff --git a/markdown2confluence/parser.py b/markdown2confluence/parser.py new file mode 100644 index 0000000..d19c585 --- /dev/null +++ b/markdown2confluence/parser.py @@ -0,0 +1,96 @@ +import os + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from collections.abc import Iterator + + +@dataclass +class ContentNode: + name: str + content: str | None = None + metadata: dict | None = None + parent: 'ContentNode | None' = None + children: dict[str, 'ContentNode'] = field(default_factory=dict) + + def add_child(self, node: 'ContentNode'): + self.children[node.name] = node + + def get_child(self, name: str) -> 'ContentNode | None': + return self.children.get(name) + + def is_leaf(self) -> bool: + return not self.children + + def __str__(self, level: int = 0) -> str: + ret = "\t" * level + repr(self.name) + "\n" + for child in self.children.values(): + ret += child.__str__(level + 1) + return ret + + +@dataclass +class ContentTree: + root: ContentNode = field(default_factory=lambda: ContentNode('root')) + + # Example usage: + # ContentTree().add_node( + # path_list=['folder1', 'folder2', 'file.md'], + # content='file content here', + # metadata={'date': '2023-04-01'} + # ) + def add_node(self, path_list: list, content: str | None = None, + metadata: dict | None = None): + current_node = self.root + for part in path_list: + next_node = current_node.get_child(part) + if not next_node: + next_node = ContentNode(name=part, parent=current_node) + current_node.add_child(next_node) + current_node = next_node + current_node.content = content + current_node.metadata = metadata + + def find_node(self, path_list: list) -> ContentNode | None: + current_node = self.root + for part in path_list: + current_node = current_node.get_child(part) + if current_node is None: + return None + return current_node + + def __str__(self) -> str: + return str(self.root) + + +class Parser(ABC): + @abstractmethod + def parse_directory(self, directory: str) -> ContentTree: + pass + + +class MarkdownParser(Parser): + + def parse_directory(self, directory: str) -> ContentTree: + content_tree = ContentTree() + for file_path in self._get_markdown_files(directory): + content = self._read_file_content(file_path) + path_list = self._get_relative_path_as_list(file_path, directory) + content_tree.add_node(path_list=path_list, content=content) + return content_tree + + def _get_markdown_files(self, directory: str) -> Iterator[str]: + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('.md'): + yield os.path.join(root, file) + + def _read_file_content(self, file_path: str) -> str: + if not os.path.exists(file_path): + raise FileNotFoundError(f"The file {file_path} was not found.") + with open(file_path, 'r', encoding='utf-8') as md_file: + return md_file.read() + + def _get_relative_path_as_list( + self, file_path: str, base_directory: str) -> list[str]: + return os.path.relpath(file_path, base_directory).split(os.sep) diff --git a/markdown2confluence/publisher.py b/markdown2confluence/publisher.py index 5d30170..9827f8c 100644 --- a/markdown2confluence/publisher.py +++ b/markdown2confluence/publisher.py @@ -1,17 +1,22 @@ from markdown2confluence.config import Config from markdown2confluence.util import Logger from markdown2confluence.confluence import ConfluenceClient +from markdown2confluence.parser import MarkdownParser as Parser config = Config() logger = Logger(__name__).get_logger() class Publisher: - def __init__(self, confluence: ConfluenceClient = None): + def __init__(self, confluence: ConfluenceClient | None = None): self.confluence = confluence or ConfluenceClient( confluence_config=config.confluence ) logger.info("Initialized Publisher") - def publish_folder(self, folder_path: str): - pass + def publish_directory(self, directory: str): + parser = Parser() + content_tree = parser.parse_directory(directory) + logger.debug("ContentTree:\n%s", content_tree) + + # TODO: traverse tree and publish with ConfluenceClient.publish_page diff --git a/markdown2confluence/util.py b/markdown2confluence/util.py index a667a60..a98a7a3 100644 --- a/markdown2confluence/util.py +++ b/markdown2confluence/util.py @@ -4,8 +4,12 @@ class Logger: - def __init__(self, name: str, log_file: str = 'markdown2confluence.log', - level: int = logging.INFO): + def __init__( + self, + name: str, + log_file: str = 'markdown2confluence.log', + level: int = logging.DEBUG if os.getenv("DEBUG") else logging.ERROR + ): """ Initialize the Logger with a specified name and log file. @@ -27,9 +31,10 @@ def __init__(self, name: str, log_file: str = 'markdown2confluence.log', log_file, maxBytes=1024*1024*5, backupCount=5) file_handler.setLevel(level) - # Create a console handler with a higher log level console_handler = logging.StreamHandler() - console_handler.setLevel(logging.ERROR) + console_handler.setLevel(level) + + print(logging.getLevelName(console_handler.level)) # Create formatter and add it to the handlers formatter = logging.Formatter( diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py new file mode 100644 index 0000000..4c13b87 --- /dev/null +++ b/tests/unit/test_parser.py @@ -0,0 +1,78 @@ +import unittest +from unittest import mock +from markdown2confluence.parser import MarkdownParser + + +class TestMarkdownParser(unittest.TestCase): + + def setUp(self): + self.parser = MarkdownParser() + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_invalid_path( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.side_effect = FileNotFoundError + with self.assertRaises(FileNotFoundError): + self.parser.parse_directory('non_existing_directory') + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_empty( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.return_value = iter([]) + tree = self.parser.parse_directory('/emptydir') + self.assertEqual(tree.root.name, 'root') + self.assertEqual(tree.root.children, {}) + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_with_non_markdown_files( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.return_value = iter( + [('/dir_with_non_md_files', [], ['test.txt'])]) + tree = self.parser.parse_directory('/dir_with_non_md_files') + self.assertEqual(tree.root.children, {}) + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_with_nested_structure( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.return_value = iter([ + ('/nesteddir', ['nested'], []), + ('/nesteddir/nested', [], ['test.md']) + ]) + tree = self.parser.parse_directory('/nesteddir') + self.assertIn('nested', tree.root.children) + self.assertIn('test.md', tree.root.children['nested'].children) + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_with_multiple_markdown_files( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.return_value = iter([ + ('/dir_with_multiple_md', [], ['test1.md', 'test2.md']) + ]) + tree = self.parser.parse_directory('/dir_with_multiple_md') + self.assertIn('test1.md', tree.root.children) + self.assertIn('test2.md', tree.root.children) + self.assertEqual( + tree.root.children['test1.md'].content, + "Content of /dir_with_multiple_md/test1.md") + self.assertEqual( + tree.root.children['test2.md'].content, + "Content of /dir_with_multiple_md/test2.md") + + +if __name__ == '__main__': + unittest.main() From 90fd2e05e4b067361a6eb24d9b92ac8cc2820e38 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Sun, 19 May 2024 20:46:35 +0200 Subject: [PATCH 06/19] Pull out ContentTree, abstract base class for Publisher --- .gptcontext | 63 +++++++++++--- markdown2confluence/config.py | 62 +++++++------- markdown2confluence/confluence.py | 45 ++++++---- markdown2confluence/content_tree.py | 59 +++++++++++++ markdown2confluence/main.py | 24 ++++-- markdown2confluence/parser.py | 59 +------------ markdown2confluence/publisher.py | 54 +++++++++--- markdown2confluence/util.py | 2 - tests/unit/test_confluence_publish_content.py | 80 +++++++++++++++++ tests/unit/test_publisher.py | 85 +++++++++++++++++++ 10 files changed, 392 insertions(+), 141 deletions(-) create mode 100644 markdown2confluence/content_tree.py create mode 100644 tests/unit/test_confluence_publish_content.py diff --git a/.gptcontext b/.gptcontext index 01ba41c..93e37f1 100644 --- a/.gptcontext +++ b/.gptcontext @@ -24,8 +24,10 @@ markdown2confluence/ │ ├── main.py │ ├── confluence.py │ ├── config.py +│ ├── content_tree.py │ ├── parser.py │ ├── util.py +│ ├── version.py │ └── publisher.py ├── README.md ├── requirements.txt @@ -80,6 +82,22 @@ class Publisher: def publish_node(self, node: ContentNode, parent_id: str | None) -> str: pass + def pre_publish_hook(self): + """ + Optional step for actions to perform before publishing, such as + fetching/deleting previously published resources. + Can be overridden by subclasses. + """ + pass + + def post_publish_hook(self): + """ + Optional step for actions to perform after publishing, such as + cleaning up resources or performing additional logging. + Can be overridden by subclasses. + """ + pass + def publish_content(self, content_tree: ContentTree): """ Traverse a content tree and call publish_node on each element. @@ -89,14 +107,29 @@ class Publisher: 2. **ConfluencePublisher** -Specialized publisher for confluence, implements the publish_node function responsible for creating pages with labels etc in confluence +Specialized publisher for confluence, implements the publish_node function responsible for creating/updating pages with labels etc in confluence ```python class ConfluencePublisher(Publisher): def __init__(self, confluence: Confluence = None): pass + def pre_publish_hook(self): + """ + Specialized for this subclass. + Fetch all pages matching space, label and suffix + """ + + def post_publish_hook(self): + """ + Specialized for this subclass. + Delete pages not in the ContentTree + """ + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + """ + Create or update pages, including attachments, ensuring labels on newly created pages. + """ pass ``` @@ -104,6 +137,22 @@ class ConfluencePublisher(Publisher): Responsible for parsing the source files from e.g. the file system. +```python +class Parser(ABC): + @abstractmethod + def parse_directory(self, directory: str) -> ContentTree: + pass + + +class MarkdownParser(Parser): + def parse_directory(self, directory: str) -> ContentTree: + pass +``` + +4. **ContentTree** + +Defines the shared data structure for content between Parser and Publisher + ```python @dataclass class ContentNode: @@ -116,14 +165,4 @@ class ContentNode: @dataclass class ContentTree: root: ContentNode = field(default_factory=lambda: ContentNode('root')) - -class Parser(ABC): - @abstractmethod - def parse_directory(self, directory: str) -> ContentTree: - pass - - -class MarkdownParser(Parser): - def parse_directory(self, directory: str) -> ContentTree: - pass -``` +`` diff --git a/markdown2confluence/config.py b/markdown2confluence/config.py index 7dd951e..0746f60 100644 --- a/markdown2confluence/config.py +++ b/markdown2confluence/config.py @@ -11,38 +11,36 @@ def __init__(self, args=None): if args is None: args = parse_args() - self.confluence = { - 'url': ( - args.confluence_url or - os.environ.get('CONFLUENCE_URL', '') - ).rstrip('/'), - 'username': ( - args.confluence_username or - os.environ.get('CONFLUENCE_USERNAME') - ), - 'password': ( - args.confluence_password or - os.environ.get('CONFLUENCE_PASSWORD') - ), - 'space_id': ( - args.confluence_space_id or - os.environ.get('CONFLUENCE_SPACE_ID') - ), - 'parent_page_id': ( - args.confluence_parent_page_id or - os.environ.get('CONFLUENCE_PARENT_PAGE_ID') - ), - 'page_title_suffix': ( - args.confluence_page_title_suffix or - os.environ.get('CONFLUENCE_PAGE_TITLE_SUFFIX') or - '(autogenerated)' - ), - 'page_label': ( - args.confluence_page_label or - os.environ.get('CONFLUENCE_PAGE_LABEL') or - 'markdown2confluence' - ), - } + self.confluence_url = ( + args.confluence_url or + os.environ.get('CONFLUENCE_URL', '') + ).rstrip('/') + self.confluence_username = ( + args.confluence_username or + os.environ.get('CONFLUENCE_USERNAME') + ) + self.confluence_password = ( + args.confluence_password or + os.environ.get('CONFLUENCE_PASSWORD') + ) + self.confluence_space_id = ( + args.confluence_space_id or + os.environ.get('CONFLUENCE_SPACE_ID') + ) + self.confluence_parent_page_id = ( + args.confluence_parent_page_id or + os.environ.get('CONFLUENCE_PARENT_PAGE_ID') + ) + self.confluence_page_title_suffix = ( + args.confluence_page_title_suffix or + os.environ.get('CONFLUENCE_PAGE_TITLE_SUFFIX') or + '(autogenerated)' + ) + self.confluence_page_label = ( + args.confluence_page_label or + os.environ.get('CONFLUENCE_PAGE_LABEL') or + 'markdown2confluence' + ) self.markdown_folder = ( args.markdown_folder or diff --git a/markdown2confluence/confluence.py b/markdown2confluence/confluence.py index 5e83f27..0cd3dd0 100644 --- a/markdown2confluence/confluence.py +++ b/markdown2confluence/confluence.py @@ -1,22 +1,37 @@ -from markdown2confluence.util import Logger +from atlassian import Confluence -logger = Logger(__name__).get_logger() +from markdown2confluence.util import Logger +from markdown2confluence.config import Config +from markdown2confluence.publisher import Publisher +from markdown2confluence.content_tree import ContentNode -class ConfluenceClient: - def __init__(self, confluence_config: dict): - self.api_endpoint = confluence_config["url"] - self.auth = (confluence_config["username"], - confluence_config["password"]) +logger = Logger(__name__).get_logger() - def create_or_update_page(self, title: str, content: str, parent_id=None): - pass - def delete_page(self, page_id: str): - pass +class ConfluencePublisher(Publisher): + def __init__(self, confluence: Confluence | None = None): + self.config = Config() + self.confluence = confluence or Confluence( + url=self.config.confluence_url, + username=self.config.confluence_username, + password=self.config.confluence_password, + cloud=True) + logger.info("Initialized Publisher") - def publish_page(self, title: str, content: str, attachments: list[str]): - pass + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + title = f"{node.name}" + content = node.content if node.content else "" + parent_page = int(parent_id) if parent_id is not None else None - def attach_file(self, page_id: int, attached_file: str): - pass + try: + page_id = self.confluence.publish_page( + title=title, + content=content, + parent_id=parent_page, + metadata=node.metadata + ) + return str(page_id) + except Exception as e: + logger.error("Failed to publish page %s: %s", title, str(e)) + return '' diff --git a/markdown2confluence/content_tree.py b/markdown2confluence/content_tree.py new file mode 100644 index 0000000..0dd9f42 --- /dev/null +++ b/markdown2confluence/content_tree.py @@ -0,0 +1,59 @@ +from dataclasses import dataclass, field + + +@dataclass +class ContentNode: + name: str + content: str | None = None + metadata: dict | None = None + parent: 'ContentNode | None' = None + children: dict[str, 'ContentNode'] = field(default_factory=dict) + + def add_child(self, node: 'ContentNode'): + self.children[node.name] = node + + def get_child(self, name: str) -> 'ContentNode | None': + return self.children.get(name) + + def is_leaf(self) -> bool: + return not self.children + + def __str__(self, level: int = 0) -> str: + ret = "\t" * level + repr(self.name) + "\n" + for child in self.children.values(): + ret += child.__str__(level + 1) + return ret + + +@dataclass +class ContentTree: + root: ContentNode = field(default_factory=lambda: ContentNode('root')) + + # Example usage: + # ContentTree().add_node( + # path_list=['folder1', 'folder2', 'file.md'], + # content='file content here', + # metadata={'date': '2023-04-01'} + # ) + def add_node(self, path_list: list, content: str | None = None, + metadata: dict | None = None): + current_node = self.root + for part in path_list: + next_node = current_node.get_child(part) + if not next_node: + next_node = ContentNode(name=part, parent=current_node) + current_node.add_child(next_node) + current_node = next_node + current_node.content = content + current_node.metadata = metadata + + def find_node(self, path_list: list) -> ContentNode | None: + current_node = self.root + for part in path_list: + current_node = current_node.get_child(part) + if current_node is None: + return None + return current_node + + def __str__(self) -> str: + return str(self.root) diff --git a/markdown2confluence/main.py b/markdown2confluence/main.py index eac71b4..5016eaa 100644 --- a/markdown2confluence/main.py +++ b/markdown2confluence/main.py @@ -1,8 +1,10 @@ -from publisher import Publisher -from config import Config -from util import Logger -from logo import LOGO_TEXT -import pkg_resources +from markdown2confluence.publisher import ConfluencePublisher as Publisher +from markdown2confluence.parser import MarkdownParser as Parser +from markdown2confluence.logo import LOGO_TEXT +from markdown2confluence.util import Logger +from markdown2confluence.config import Config + +import importlib.metadata config = Config() logger = Logger("main").get_logger() @@ -10,15 +12,21 @@ def logo_and_version(): print(LOGO_TEXT) - version = pkg_resources.get_distribution("markdown2confluence").version + version = importlib.metadata.version("markdown2confluence") print(f"Version: {version}\n") def main(): logo_and_version() logger.info("Started markdown2confluence") - logger.info("Publishing folder %s", config.markdown_folder) - Publisher().publish_directory(config.markdown_folder) + + directory = config.markdown_folder + + logger.info("Parsing folder %s", directory) + content = Parser().parse_directory(directory) + + logger.info("Publishing content from directory %s", directory) + Publisher().publish_content(content) if __name__ == "__main__": diff --git a/markdown2confluence/parser.py b/markdown2confluence/parser.py index d19c585..bca1d18 100644 --- a/markdown2confluence/parser.py +++ b/markdown2confluence/parser.py @@ -1,66 +1,9 @@ import os from abc import ABC, abstractmethod -from dataclasses import dataclass, field from collections.abc import Iterator - -@dataclass -class ContentNode: - name: str - content: str | None = None - metadata: dict | None = None - parent: 'ContentNode | None' = None - children: dict[str, 'ContentNode'] = field(default_factory=dict) - - def add_child(self, node: 'ContentNode'): - self.children[node.name] = node - - def get_child(self, name: str) -> 'ContentNode | None': - return self.children.get(name) - - def is_leaf(self) -> bool: - return not self.children - - def __str__(self, level: int = 0) -> str: - ret = "\t" * level + repr(self.name) + "\n" - for child in self.children.values(): - ret += child.__str__(level + 1) - return ret - - -@dataclass -class ContentTree: - root: ContentNode = field(default_factory=lambda: ContentNode('root')) - - # Example usage: - # ContentTree().add_node( - # path_list=['folder1', 'folder2', 'file.md'], - # content='file content here', - # metadata={'date': '2023-04-01'} - # ) - def add_node(self, path_list: list, content: str | None = None, - metadata: dict | None = None): - current_node = self.root - for part in path_list: - next_node = current_node.get_child(part) - if not next_node: - next_node = ContentNode(name=part, parent=current_node) - current_node.add_child(next_node) - current_node = next_node - current_node.content = content - current_node.metadata = metadata - - def find_node(self, path_list: list) -> ContentNode | None: - current_node = self.root - for part in path_list: - current_node = current_node.get_child(part) - if current_node is None: - return None - return current_node - - def __str__(self) -> str: - return str(self.root) +from markdown2confluence.content_tree import ContentTree class Parser(ABC): diff --git a/markdown2confluence/publisher.py b/markdown2confluence/publisher.py index 9827f8c..705a3e6 100644 --- a/markdown2confluence/publisher.py +++ b/markdown2confluence/publisher.py @@ -1,22 +1,48 @@ -from markdown2confluence.config import Config +from abc import ABC, abstractmethod + from markdown2confluence.util import Logger -from markdown2confluence.confluence import ConfluenceClient -from markdown2confluence.parser import MarkdownParser as Parser +from markdown2confluence.content_tree import ContentTree, ContentNode -config = Config() logger = Logger(__name__).get_logger() -class Publisher: - def __init__(self, confluence: ConfluenceClient | None = None): - self.confluence = confluence or ConfluenceClient( - confluence_config=config.confluence - ) - logger.info("Initialized Publisher") +class Publisher(ABC): + @abstractmethod + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + pass + + def pre_publish_hook(self): + """ + Optional step for actions to perform before publishing, such as + fetching/deleting previously published resources. + Can be overridden by subclasses. + """ + pass + + def post_publish_hook(self): + """ + Optional step for actions to perform after publishing, such as + cleaning up resources or performing additional logging. + Can be overridden by subclasses. + """ + pass - def publish_directory(self, directory: str): - parser = Parser() - content_tree = parser.parse_directory(directory) + def publish_content(self, content_tree: ContentTree): logger.debug("ContentTree:\n%s", content_tree) - # TODO: traverse tree and publish with ConfluenceClient.publish_page + self.pre_publish_hook() + + def traverse_and_publish( + node: ContentNode, + parent_id: str | None = None): + logger.debug("Processing node: %s", node.name) + + parent_id = self.publish_node(node, parent_id) + + for child in node.children.values(): + traverse_and_publish(child, parent_id) + + traverse_and_publish(content_tree.root) + + self.post_publish_hook() + diff --git a/markdown2confluence/util.py b/markdown2confluence/util.py index a98a7a3..3e2a91a 100644 --- a/markdown2confluence/util.py +++ b/markdown2confluence/util.py @@ -34,8 +34,6 @@ def __init__( console_handler = logging.StreamHandler() console_handler.setLevel(level) - print(logging.getLevelName(console_handler.level)) - # Create formatter and add it to the handlers formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') diff --git a/tests/unit/test_confluence_publish_content.py b/tests/unit/test_confluence_publish_content.py new file mode 100644 index 0000000..49cff9b --- /dev/null +++ b/tests/unit/test_confluence_publish_content.py @@ -0,0 +1,80 @@ +import unittest +from unittest import mock +from markdown2confluence.publisher import ConfluencePublisher +from markdown2confluence.content_tree import ContentTree, ContentNode + + +class TestConfluencePublisher(unittest.TestCase): + + @mock.patch('markdown2confluence.publisher.ConfluenceClient') + def setUp(self, MockConfluenceClient): + self.mock_confluence_client = MockConfluenceClient.return_value + self.mock_confluence_client.publish_page.return_value = 1234 + self.publisher = ConfluencePublisher(self.mock_confluence_client) + + @mock.patch('markdown2confluence.publisher.logger') + def test_publish_content_handles_exception(self, mock_logger): + # Create a minimal content tree with one node + root_node = ContentNode(name='root') + content_tree = ContentTree(root=root_node) + + # Simulate an exception when publishing a page + self.mock_confluence_client.publish_page.side_effect = Exception( + "Error") + + # Run the method + self.publisher.publish_content(content_tree) + + # Check that the error was logged + mock_logger.error.assert_called_with( + 'Failed to publish page %s: %s', 'root', 'Error') + + def test_publish_content_processes_children(self): + # Create a content tree with root and one child node + child_node = ContentNode( + name='child', + content='some content', + metadata={'dummy': 'child metadata'} + ) + root_node = ContentNode( + name='root', + children={'child': child_node}, + metadata={'dummy': 'root metadata'} + ) + content_tree = ContentTree(root=root_node) + + # Run the method + self.publisher.publish_content(content_tree) + + calls = [ + mock.call( + title='root', + content='', + parent_id=None, + metadata={'dummy': 'root metadata'}, + ), + mock.call( + title='child', + content='some content', + parent_id=1234, + metadata={'dummy': 'child metadata'}, + ) + ] + self.mock_confluence_client.publish_page.assert_has_calls( + calls, any_order=True) + + @mock.patch('markdown2confluence.publisher.logger') + def test_publish_content_logs_node_processing(self, mock_logger): + # Create a minimal content tree with one node + root_node = ContentNode(name='root') + content_tree = ContentTree(root=root_node) + + # Run the method + self.publisher.publish_content(content_tree) + + # Check that processing the node was logged + mock_logger.debug.assert_any_call('Processing node: %s', 'root') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/test_publisher.py b/tests/unit/test_publisher.py index e69de29..2c89536 100644 --- a/tests/unit/test_publisher.py +++ b/tests/unit/test_publisher.py @@ -0,0 +1,85 @@ +import unittest +from unittest import mock +from markdown2confluence.publisher import Publisher +from markdown2confluence.content_tree import ContentTree, ContentNode + + +# Create a mock publisher inheriting from Publisher to test abstract methods +class MockPublisher(Publisher): + def publish_node(self, node, parent_id): + return '' + + +class TestPublisher(unittest.TestCase): + def setUp(self): + self.publisher = MockPublisher() + self.mock_publish_node = mock.patch.object( + MockPublisher, 'publish_node', autospec=True + ).start() + self.mock_publish_node.side_effect = ( + lambda __self__, node, __parent_id__: f"mock_id_for_{node.name}" + ) + self.addCleanup(mock.patch.stopall) + + def test_publish_content(self): + # Create a simple content tree + child1 = ContentNode(name='child1') + child2 = ContentNode(name='child2') + root = ContentNode(name='root', children={ + 'child1': child1, 'child2': child2}) + content_tree = ContentTree(root=root) + + # Call publish_content + self.publisher.publish_content(content_tree) + + self.assertEqual(self.mock_publish_node.call_count, 3) + self.mock_publish_node.assert_any_call(self.publisher, root, None) + self.mock_publish_node.assert_any_call( + self.publisher, child1, 'mock_id_for_root') + self.mock_publish_node.assert_any_call( + self.publisher, child2, 'mock_id_for_root') + + def test_publish_with_circular_reference(self): + # Create nodes with circular references + node_a = ContentNode(name='A') + node_b = ContentNode(name='B') + node_a.children['B'] = node_b + node_b.children['A'] = node_a + content_tree = ContentTree(root=node_a) + + with self.assertRaises(RuntimeError): + self.publisher.publish_content(content_tree) + + def test_publish_with_missing_root(self): + # Create a content tree without a root + content_tree = ContentTree(root=None) + + with self.assertRaises(AttributeError): + self.publisher.publish_content(content_tree) + + def test_publish_with_none_node(self): + # Create a content tree with a None node + node = None + content_tree = ContentTree(root=node) + + with self.assertRaises(AttributeError): + self.publisher.publish_content(content_tree) + + def test_pre_post_hooks_called(self): + self.publisher.pre_publish_hook = mock.MagicMock() + self.publisher.post_publish_hook = mock.MagicMock() + + # Create a simple content tree + root = ContentNode(name='root') + content_tree = ContentTree(root=root) + + # Call publish_content + self.publisher.publish_content(content_tree) + + self.publisher.pre_publish_hook.assert_called_once() + self.publisher.post_publish_hook.assert_called_once() + + +if __name__ == '__main__': + unittest.main() + From 2be7b955ee55603be61c28685f212de420015188 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Thu, 23 May 2024 16:02:49 +0200 Subject: [PATCH 07/19] Implement ConfluencePublisher + tests --- .gptcontext | 36 ++++- Pipfile | 1 + Pipfile.lock | 38 +++-- README.md | 1 + markdown2confluence/config.py | 10 +- markdown2confluence/confluence.py | 149 ++++++++++++++++-- markdown2confluence/content_tree.py | 23 ++- markdown2confluence/converter.py | 17 +- markdown2confluence/main.py | 2 +- markdown2confluence/publisher.py | 4 +- tests/unit/test_confluence_publish_content.py | 80 ---------- tests/unit/test_content_tree.py | 88 +++++++++++ tests/unit/test_publisher.py | 27 ++-- 13 files changed, 336 insertions(+), 140 deletions(-) delete mode 100644 tests/unit/test_confluence_publish_content.py create mode 100644 tests/unit/test_content_tree.py diff --git a/.gptcontext b/.gptcontext index 93e37f1..25862e1 100644 --- a/.gptcontext +++ b/.gptcontext @@ -19,9 +19,9 @@ markdown2confluence/ │ └── usage.md ├── LICENCE ├── markdown2confluence -│ ├── converter.py │ ├── __init_.py │ ├── main.py +│ ├── converter.py │ ├── confluence.py │ ├── config.py │ ├── content_tree.py @@ -157,12 +157,38 @@ Defines the shared data structure for content between Parser and Publisher @dataclass class ContentNode: name: str - content: [str] | None = None - metadata: [dict] | None = None - parent: ['ContentNode'] | None = None + content: str | None = None + metadata: dict | None = None + parent: 'ContentNode | None' = None children: dict[str, 'ContentNode'] = field(default_factory=dict) + def add_child(self, node: 'ContentNode'): + pass + + def get_child(self, name: str) -> 'ContentNode | None': + pass + + def is_leaf(self) -> bool: + pass + + def is_root(self) -> bool: + pass + + def __str__(self, level: int = 0) -> str: + pass + + @dataclass class ContentTree: root: ContentNode = field(default_factory=lambda: ContentNode('root')) -`` + + def add_node(self, path_list: list, content: str | None = None, + metadata: dict | None = None): + pass + + def find_node(self, path_list: list) -> ContentNode | None: + pass + + def __str__(self) -> str: + pass +``` diff --git a/Pipfile b/Pipfile index bb72659..166ed04 100644 --- a/Pipfile +++ b/Pipfile @@ -6,6 +6,7 @@ name = "pypi" [packages] atlassian-python-api = "*" markdown2confluence = {file = ".", editable = true} +markdown = "*" [dev-packages] setuptools = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 33f9c8c..c1c4b7e 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "018c8b383747f100e61899863605378863f1c783d3e36b8ca54c279adcaa2de4" + "sha256": "9ac017fe09f0b3ac0d603ce54305f6e0605b31af61c2ee032dfa46e8e37cdfb3" }, "pipfile-spec": 6, "requires": { @@ -18,11 +18,11 @@ "default": { "atlassian-python-api": { "hashes": [ - "sha256:47ac76a171f08537cff64253d1b49a016dc6636dfbba324944c01397d755391c", - "sha256:e6503b2bfeedf100fcabc1d541718a8ab5e6fd757164438fcf4948e6ecea12e4" + "sha256:b77b081da3242794060f553079d93a9b26bd0aa047d86abf1ae9e7bcf59fe4e8", + "sha256:f3887c5fe0149e90d22cd0fd8d99cd6a626e74ce80c190a40515d02c4a7a1a92" ], "index": "pypi", - "version": "==3.41.11" + "version": "==3.41.13" }, "beautifulsoup4": { "hashes": [ @@ -160,6 +160,14 @@ "markers": "python_version >= '3.7'", "version": "==1.0.1" }, + "markdown": { + "hashes": [ + "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f", + "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224" + ], + "index": "pypi", + "version": "==3.6" + }, "markdown2confluence": { "editable": true, "file": "." @@ -174,11 +182,11 @@ }, "requests": { "hashes": [ - "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", - "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" + "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289", + "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c" ], - "markers": "python_version >= '3.7'", - "version": "==2.31.0" + "markers": "python_version >= '3.8'", + "version": "==2.32.2" }, "requests-oauthlib": { "hashes": [ @@ -330,12 +338,11 @@ }, "pytest": { "hashes": [ - "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233", - "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f" + "sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd", + "sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1" ], "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==8.2.0" + "version": "==8.2.1" }, "pytest-watch": { "hashes": [ @@ -346,12 +353,11 @@ }, "setuptools": { "hashes": [ - "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987", - "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32" + "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4", + "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0" ], "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==69.5.1" + "version": "==70.0.0" }, "watchdog": { "hashes": [ diff --git a/README.md b/README.md index 4368339..f7dc030 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Optionally, you can also set: To upload sample markdown files to Confluence, run the following Docker command, replacing `VAR1`, `VAR2`, etc., with your Confluence configuration values: + ```bash # Run from the root of this repo docker run --rm \ diff --git a/markdown2confluence/config.py b/markdown2confluence/config.py index 0746f60..6a566da 100644 --- a/markdown2confluence/config.py +++ b/markdown2confluence/config.py @@ -62,12 +62,14 @@ def __init__(self, args=None): def validate(self): missing_fields = [] - required_fields = ['url', 'username', 'password', 'space_id', - 'parent_page_id', 'page_title_suffix'] + required_fields = ['confluence_url', 'confluence_username', + 'confluence_password', 'confluence_space_id', + 'confluence_parent_page_id', + 'confluence_page_title_suffix'] for key in required_fields: - if not self.confluence.get(key): - missing_fields.append("confluence_{}".format(key)) + if not getattr(self, key): + missing_fields.append(key) if missing_fields: raise ValueError("The following configuration fields are " diff --git a/markdown2confluence/confluence.py b/markdown2confluence/confluence.py index 0cd3dd0..723ee39 100644 --- a/markdown2confluence/confluence.py +++ b/markdown2confluence/confluence.py @@ -1,7 +1,10 @@ from atlassian import Confluence +import requests +import hashlib from markdown2confluence.util import Logger from markdown2confluence.config import Config +from markdown2confluence.converter import Converter from markdown2confluence.publisher import Publisher from markdown2confluence.content_tree import ContentNode @@ -16,22 +19,144 @@ def __init__(self, confluence: Confluence | None = None): url=self.config.confluence_url, username=self.config.confluence_username, password=self.config.confluence_password, - cloud=True) + cloud=True + ) + self.suffix = self.config.confluence_page_title_suffix + self.label = self.config.confluence_page_label + self.autogen_notice = ( + "" + "Do not make changes here" + "" + "

This page is autogenerated. Make changes in the " + f"GitHub repository

" + "
" + "
" + ) + logger.info("Initialized Publisher") + def pre_publish_hook(self): + cql = ( + f"space='{self.config.confluence_space_id}' " + f"AND label='{self.label}' " + f"AND title~'{self.suffix}'" + ) + self.stale_pages: list[dict[str, any]] = self.confluence.cql( + cql, start=0, limit=None).get('results', []) + logger.info("Fetched %d stale pages", len(self.stale_pages)) + + def post_publish_hook(self): + logger.debug(f"Found {len(self.stale_pages)} remaining stale pages") + for page in self.stale_pages: + page_id = page['content']['id'] + title = page['content']['title'] + labels = page.get('metadata', {}).get('labels', []) + + if not title.endswith(self.suffix): + logger.warning("Skipping deletion of unmanaged page %s", title) + continue + + if self.label not in labels: + logger.warning("Skipping deletion of page %s missing label %s", + title, self.label) + continue + + self.confluence.remove_page(page_id) + logger.info("Deleted unmanaged page %s", title) + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: - title = f"{node.name}" - content = node.content if node.content else "" - parent_page = int(parent_id) if parent_id is not None else None + identifier = f"{node.name}{node.parent.name or ''}" + hash = hashlib.md5(identifier.encode('utf-8')).hexdigest()[:3] + + title = f"{node.name} #{hash} {self.suffix}" + content = Converter.convert_markdown_to_html(node.content or "") + content = self.autogen_notice + content + parent_page = ( + int(parent_id) if parent_id is not None + else self.config.confluence_parent_page_id + ) + + page_id = self._get_existing_page_id(title) + if page_id: + logger.debug( + f"Found existing page: {page_id} matching title {title}") + self._update_page(page_id, title, content, parent_page, node) + else: + logger.debug( + f"Found no existing page for title {title}") + page_id = self._create_page(title, content, parent_page, node) + + self._attach_files(page_id, node) + return str(page_id) + def _get_existing_page_id(self, title: str) -> str | None: + for page in self.stale_pages: + if page['content']['title'] == title: + self.stale_pages.remove(page) + return page['content']['id'] + return None + + def _create_page(self, title: str, content: str, parent_id: int | None, + node: ContentNode) -> str: + logger.debug(f"creating page {title} with parent id {parent_id}") + try: + page = self.confluence.create_page( + space=self.config.confluence_space_id, + title=title, + body=content, + parent_id=parent_id, + type='page', + representation='storage', + editor='v2', + full_width=False + ) + page_id = str(page['id']) + self.confluence.set_page_label(page_id, self.label) + logger.info("Created page %s with ID %s", title, page_id) + return page_id + except requests.exceptions.HTTPError as e: + if e.response.status_code == 400: + logger.error("Error creating page: %s", e.response.text) + raise ValueError( + "Failed to create page due to bad request.") from e + else: + logger.error("HTTP error occurred: %s", e.response.text) + raise + + def _update_page(self, page_id: str, title: str, content: str, + parent_id: int | None, node: ContentNode): + logger.debug(f"updating page {node.name} with parent {page_id}") try: - page_id = self.confluence.publish_page( + self.confluence.update_page( + page_id=page_id, title=title, - content=content, - parent_id=parent_page, - metadata=node.metadata + body=content, + parent_id=parent_id, + type='page', + representation='storage', + minor_edit=False, + full_width=False ) - return str(page_id) - except Exception as e: - logger.error("Failed to publish page %s: %s", title, str(e)) - return '' + self.confluence.set_page_label(page_id, self.label) + logger.info("Updated page %s with ID %s and label %s", + title, page_id, self.label) + except requests.exceptions.HTTPError as e: + if e.response.status_code == 400: + logger.error("Error updating page: %s", e.response.text) + raise ValueError( + "Failed to update page due to bad request.") from e + else: + logger.error("HTTP error occurred: %s", e.response.text) + raise + + def _attach_files(self, page_id: str, node: ContentNode): + if not node.metadata: + return + + for file in node.metadata.get('attachments', []): + self.confluence.attach_file( + filename=file, + page_id=page_id + ) + logger.info("Attached file %s to page ID %s", file, page_id) + diff --git a/markdown2confluence/content_tree.py b/markdown2confluence/content_tree.py index 0dd9f42..2a7b7ed 100644 --- a/markdown2confluence/content_tree.py +++ b/markdown2confluence/content_tree.py @@ -10,6 +10,7 @@ class ContentNode: children: dict[str, 'ContentNode'] = field(default_factory=dict) def add_child(self, node: 'ContentNode'): + node.parent = self self.children[node.name] = node def get_child(self, name: str) -> 'ContentNode | None': @@ -18,6 +19,9 @@ def get_child(self, name: str) -> 'ContentNode | None': def is_leaf(self) -> bool: return not self.children + def is_root(self) -> bool: + return self.parent is None + def __str__(self, level: int = 0) -> str: ret = "\t" * level + repr(self.name) + "\n" for child in self.children.values(): @@ -29,27 +33,31 @@ def __str__(self, level: int = 0) -> str: class ContentTree: root: ContentNode = field(default_factory=lambda: ContentNode('root')) - # Example usage: - # ContentTree().add_node( - # path_list=['folder1', 'folder2', 'file.md'], - # content='file content here', - # metadata={'date': '2023-04-01'} - # ) def add_node(self, path_list: list, content: str | None = None, metadata: dict | None = None): + if not path_list: + raise ValueError("Path list cannot be empty.") current_node = self.root for part in path_list: + if not part: + raise ValueError("Path components must be non-empty strings.") next_node = current_node.get_child(part) if not next_node: - next_node = ContentNode(name=part, parent=current_node) + next_node = ContentNode(name=part) current_node.add_child(next_node) current_node = next_node + if current_node is self.root: + raise ValueError("Cannot add content to the root node.") current_node.content = content current_node.metadata = metadata def find_node(self, path_list: list) -> ContentNode | None: + if not path_list: + raise ValueError("Path list cannot be empty.") current_node = self.root for part in path_list: + if not part: + raise ValueError("Path components must be non-empty strings.") current_node = current_node.get_child(part) if current_node is None: return None @@ -57,3 +65,4 @@ def find_node(self, path_list: list) -> ContentNode | None: def __str__(self) -> str: return str(self.root) + diff --git a/markdown2confluence/converter.py b/markdown2confluence/converter.py index c7612ed..9513b01 100644 --- a/markdown2confluence/converter.py +++ b/markdown2confluence/converter.py @@ -1,4 +1,8 @@ import markdown +from markdown.extensions.codehilite import CodeHiliteExtension +from markdown.extensions.extra import ExtraExtension +from markdown.extensions.meta import MetaExtension +from markdown.extensions.toc import TocExtension class Converter: @@ -8,7 +12,8 @@ def __init__(self): """Initialize the converter.""" pass - def convert_markdown_to_html(self, markdown_content): + @staticmethod + def convert_markdown_to_html(markdown_content): """Convert Markdown content to HTML. Args: @@ -17,5 +22,13 @@ def convert_markdown_to_html(self, markdown_content): Returns: str: HTML content generated from the Markdown. """ - html_content = markdown.markdown(markdown_content) + extensions = [ + CodeHiliteExtension(linenums=False, guess_lang=False), + ExtraExtension(), + MetaExtension(), + TocExtension(permalink=True) + ] + html_content = markdown.markdown( + markdown_content, extensions=extensions) return html_content + diff --git a/markdown2confluence/main.py b/markdown2confluence/main.py index 5016eaa..a1f4e60 100644 --- a/markdown2confluence/main.py +++ b/markdown2confluence/main.py @@ -1,4 +1,4 @@ -from markdown2confluence.publisher import ConfluencePublisher as Publisher +from markdown2confluence.confluence import ConfluencePublisher as Publisher from markdown2confluence.parser import MarkdownParser as Parser from markdown2confluence.logo import LOGO_TEXT from markdown2confluence.util import Logger diff --git a/markdown2confluence/publisher.py b/markdown2confluence/publisher.py index 705a3e6..ffc7f6f 100644 --- a/markdown2confluence/publisher.py +++ b/markdown2confluence/publisher.py @@ -37,7 +37,8 @@ def traverse_and_publish( parent_id: str | None = None): logger.debug("Processing node: %s", node.name) - parent_id = self.publish_node(node, parent_id) + if not node.is_root(): + parent_id = self.publish_node(node, parent_id) for child in node.children.values(): traverse_and_publish(child, parent_id) @@ -45,4 +46,3 @@ def traverse_and_publish( traverse_and_publish(content_tree.root) self.post_publish_hook() - diff --git a/tests/unit/test_confluence_publish_content.py b/tests/unit/test_confluence_publish_content.py deleted file mode 100644 index 49cff9b..0000000 --- a/tests/unit/test_confluence_publish_content.py +++ /dev/null @@ -1,80 +0,0 @@ -import unittest -from unittest import mock -from markdown2confluence.publisher import ConfluencePublisher -from markdown2confluence.content_tree import ContentTree, ContentNode - - -class TestConfluencePublisher(unittest.TestCase): - - @mock.patch('markdown2confluence.publisher.ConfluenceClient') - def setUp(self, MockConfluenceClient): - self.mock_confluence_client = MockConfluenceClient.return_value - self.mock_confluence_client.publish_page.return_value = 1234 - self.publisher = ConfluencePublisher(self.mock_confluence_client) - - @mock.patch('markdown2confluence.publisher.logger') - def test_publish_content_handles_exception(self, mock_logger): - # Create a minimal content tree with one node - root_node = ContentNode(name='root') - content_tree = ContentTree(root=root_node) - - # Simulate an exception when publishing a page - self.mock_confluence_client.publish_page.side_effect = Exception( - "Error") - - # Run the method - self.publisher.publish_content(content_tree) - - # Check that the error was logged - mock_logger.error.assert_called_with( - 'Failed to publish page %s: %s', 'root', 'Error') - - def test_publish_content_processes_children(self): - # Create a content tree with root and one child node - child_node = ContentNode( - name='child', - content='some content', - metadata={'dummy': 'child metadata'} - ) - root_node = ContentNode( - name='root', - children={'child': child_node}, - metadata={'dummy': 'root metadata'} - ) - content_tree = ContentTree(root=root_node) - - # Run the method - self.publisher.publish_content(content_tree) - - calls = [ - mock.call( - title='root', - content='', - parent_id=None, - metadata={'dummy': 'root metadata'}, - ), - mock.call( - title='child', - content='some content', - parent_id=1234, - metadata={'dummy': 'child metadata'}, - ) - ] - self.mock_confluence_client.publish_page.assert_has_calls( - calls, any_order=True) - - @mock.patch('markdown2confluence.publisher.logger') - def test_publish_content_logs_node_processing(self, mock_logger): - # Create a minimal content tree with one node - root_node = ContentNode(name='root') - content_tree = ContentTree(root=root_node) - - # Run the method - self.publisher.publish_content(content_tree) - - # Check that processing the node was logged - mock_logger.debug.assert_any_call('Processing node: %s', 'root') - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/unit/test_content_tree.py b/tests/unit/test_content_tree.py new file mode 100644 index 0000000..f164fd8 --- /dev/null +++ b/tests/unit/test_content_tree.py @@ -0,0 +1,88 @@ +import unittest +from markdown2confluence.content_tree import ContentNode, ContentTree + + +class TestContentNode(unittest.TestCase): + def setUp(self): + self.root = ContentNode(name='root') + self.child1 = ContentNode(name='child1', parent=self.root) + self.child2 = ContentNode(name='child2', parent=self.root) + self.root.add_child(self.child1) + self.root.add_child(self.child2) + + def test_add_child(self): + self.assertIn('child1', self.root.children) + self.assertIn('child2', self.root.children) + + def test_get_child(self): + self.assertEqual(self.root.get_child('child1'), self.child1) + self.assertIsNone(self.root.get_child('nonexistent')) + + def test_is_leaf(self): + self.assertFalse(self.root.is_leaf()) + self.assertTrue(self.child1.is_leaf()) + + def test_is_root(self): + self.assertTrue(self.root.is_root()) + self.assertFalse(self.child1.is_root()) + + def test_str(self): + expected = "'root'\n\t'child1'\n\t'child2'\n" + self.assertEqual(str(self.root), expected) + + +class TestContentTree(unittest.TestCase): + def setUp(self): + self.tree = ContentTree() + + def test_add_node(self): + self.tree.add_node(['level1', 'level2'], content='test content') + node = self.tree.find_node(['level1', 'level2']) + self.assertIsNotNone(node) + self.assertEqual(node.content, 'test content') + + def test_find_node(self): + self.tree.add_node(['level1', 'level2a']) + self.tree.add_node(['level1', 'level2b']) + node_a = self.tree.find_node(['level1', 'level2a']) + node_b = self.tree.find_node(['level1', 'level2b']) + self.assertIsNotNone(node_a) + self.assertIsNotNone(node_b) + self.assertIsNone(self.tree.find_node(['nonexistent'])) + + def test_tree_str(self): + self.tree.add_node(['level1', 'level2'], content='test content') + expected = "'root'\n\t'level1'\n\t\t'level2'\n" + self.assertEqual(str(self.tree), expected) + + def test_add_node_nested(self): + self.tree.add_node(['level1', 'level2', 'level3'], content='nested') + node = self.tree.find_node(['level1', 'level2', 'level3']) + self.assertIsNotNone(node) + self.assertEqual(node.content, 'nested') + + def test_add_node_empty_parent(self): + with self.assertRaises(ValueError): + self.tree.add_node(['', 'level1']) + + def test_find_node_invalid(self): + self.assertIsNone(self.tree.find_node(['level1', 'nonexistent'])) + + def test_multiple_children(self): + self.tree.add_node(['level1', 'child1']) + self.tree.add_node(['level1', 'child2']) + node1 = self.tree.find_node(['level1', 'child1']) + node2 = self.tree.find_node(['level1', 'child2']) + self.assertIsNotNone(node1) + self.assertIsNotNone(node2) + + def test_add_same_node_twice(self): + self.tree.add_node(['level1', 'level2'], content='first') + self.tree.add_node(['level1', 'level2'], content='second') + node = self.tree.find_node(['level1', 'level2']) + self.assertEqual(node.content, 'second') + + +if __name__ == '__main__': + unittest.main() + diff --git a/tests/unit/test_publisher.py b/tests/unit/test_publisher.py index 2c89536..2cc846a 100644 --- a/tests/unit/test_publisher.py +++ b/tests/unit/test_publisher.py @@ -21,30 +21,37 @@ def setUp(self): ) self.addCleanup(mock.patch.stopall) - def test_publish_content(self): + def test_publish_nested(self): # Create a simple content tree + root = ContentNode(name='root') child1 = ContentNode(name='child1') child2 = ContentNode(name='child2') - root = ContentNode(name='root', children={ - 'child1': child1, 'child2': child2}) + child3 = ContentNode(name='child3') + root.add_child(child1) + root.add_child(child2) + child2.add_child(child3) content_tree = ContentTree(root=root) # Call publish_content self.publisher.publish_content(content_tree) self.assertEqual(self.mock_publish_node.call_count, 3) - self.mock_publish_node.assert_any_call(self.publisher, root, None) self.mock_publish_node.assert_any_call( - self.publisher, child1, 'mock_id_for_root') + self.publisher, child1, None + ) + self.mock_publish_node.assert_any_call( + self.publisher, child2, None + ) self.mock_publish_node.assert_any_call( - self.publisher, child2, 'mock_id_for_root') + self.publisher, child3, 'mock_id_for_child2' + ) def test_publish_with_circular_reference(self): # Create nodes with circular references node_a = ContentNode(name='A') node_b = ContentNode(name='B') - node_a.children['B'] = node_b - node_b.children['A'] = node_a + node_a.add_child(node_b) + node_b.add_child(node_a) content_tree = ContentTree(root=node_a) with self.assertRaises(RuntimeError): @@ -59,8 +66,7 @@ def test_publish_with_missing_root(self): def test_publish_with_none_node(self): # Create a content tree with a None node - node = None - content_tree = ContentTree(root=node) + content_tree = ContentTree(root=None) with self.assertRaises(AttributeError): self.publisher.publish_content(content_tree) @@ -82,4 +88,3 @@ def test_pre_post_hooks_called(self): if __name__ == '__main__': unittest.main() - From 49670829b55ba42af121220fe87f70af01553e51 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Thu, 23 May 2024 16:06:20 +0200 Subject: [PATCH 08/19] Update __init__.py --- markdown2confluence/__init_.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/markdown2confluence/__init_.py b/markdown2confluence/__init_.py index 9fd9deb..9359f50 100644 --- a/markdown2confluence/__init_.py +++ b/markdown2confluence/__init_.py @@ -6,5 +6,8 @@ from .main import main from .converter import Converter from .publisher import Publisher +from .confluence import ConfluencePublisher +from .parser import Parser, MarkdownParser from .config import Config from .util import Logger +from .content_tree import ContentTree, ContentNode From 5e036b83d84c39be79bbf2e8050884f857e159a7 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Fri, 31 May 2024 13:43:55 +0200 Subject: [PATCH 09/19] Update Dockerfile, change setup.py to pyproject.toml, set default loglevel info --- .dockerignore | 10 +++++ Dockerfile | 12 +++--- Pipfile | 1 - Pipfile.lock | 75 +++++++++++++++++------------------ markdown2confluence/config.py | 1 - markdown2confluence/main.py | 9 ++--- markdown2confluence/util.py | 2 +- pyproject.toml | 22 ++++++++++ requirements.txt | 3 +- setup.py | 15 ------- 10 files changed, 82 insertions(+), 68 deletions(-) create mode 100644 .dockerignore create mode 100644 pyproject.toml delete mode 100644 setup.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..afd9ca5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +__pycache__ +*.pyc +.env +venv +.venv +.vscode +.idea +.git +.github + diff --git a/Dockerfile b/Dockerfile index ca7e19c..a93902e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,12 +3,16 @@ FROM python:3.11-slim WORKDIR /app COPY requirements.txt /app/ - RUN pip install --no-cache-dir -r requirements.txt +COPY . /app/ + +# Install the current package +RUN pip install . + ENV CONFLUENCE_USERNAME="" ENV CONFLUENCE_PASSWORD="" -ENV CONFLUENCE_URL="https://yourdomain.atlassian.net/wiki/rest/api/" +ENV CONFLUENCE_URL="https://yourdomain.atlassian.net/wiki/" ENV CONFLUENCE_SPACE_ID="yourspace" ENV CONFLUENCE_PARENT_PAGE_ID="12345" ENV CONFLUENCE_PAGE_TITLE_SUFFIX="(autogenerated)" @@ -16,6 +20,4 @@ ENV CONFLUENCE_PAGE_LABEL="markdown2confluence" ENV MARKDOWN_FOLDER="./" ENV MARKDOWN_SOURCE_REF="" -COPY ./markdown2confluence /app - -CMD ["python", "/app/main.py"] +CMD ["python", "markdown2confluence/main.py"] diff --git a/Pipfile b/Pipfile index 166ed04..b3932d7 100644 --- a/Pipfile +++ b/Pipfile @@ -5,7 +5,6 @@ name = "pypi" [packages] atlassian-python-api = "*" -markdown2confluence = {file = ".", editable = true} markdown = "*" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index c1c4b7e..61b5bed 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9ac017fe09f0b3ac0d603ce54305f6e0605b31af61c2ee032dfa46e8e37cdfb3" + "sha256": "93a578c32fda5610b20068fede2f54e00397853170219c4019e59743cb50e67e" }, "pipfile-spec": 6, "requires": { @@ -168,10 +168,6 @@ "index": "pypi", "version": "==3.6" }, - "markdown2confluence": { - "editable": true, - "file": "." - }, "oauthlib": { "hashes": [ "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", @@ -182,11 +178,11 @@ }, "requests": { "hashes": [ - "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289", - "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c" + "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", + "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" ], "markers": "python_version >= '3.8'", - "version": "==2.32.2" + "version": "==2.32.3" }, "requests-oauthlib": { "hashes": [ @@ -361,38 +357,41 @@ }, "watchdog": { "hashes": [ - "sha256:11e12fafb13372e18ca1bbf12d50f593e7280646687463dd47730fd4f4d5d257", - "sha256:2895bf0518361a9728773083908801a376743bcc37dfa252b801af8fd281b1ca", - "sha256:39cb34b1f1afbf23e9562501673e7146777efe95da24fab5707b88f7fb11649b", - "sha256:45cc09cc4c3b43fb10b59ef4d07318d9a3ecdbff03abd2e36e77b6dd9f9a5c85", - "sha256:4986db5e8880b0e6b7cd52ba36255d4793bf5cdc95bd6264806c233173b1ec0b", - "sha256:5369136a6474678e02426bd984466343924d1df8e2fd94a9b443cb7e3aa20d19", - "sha256:557ba04c816d23ce98a06e70af6abaa0485f6d94994ec78a42b05d1c03dcbd50", - "sha256:6a4db54edea37d1058b08947c789a2354ee02972ed5d1e0dca9b0b820f4c7f92", - "sha256:6a80d5cae8c265842c7419c560b9961561556c4361b297b4c431903f8c33b269", - "sha256:6a9c71a0b02985b4b0b6d14b875a6c86ddea2fdbebd0c9a720a806a8bbffc69f", - "sha256:6c47bdd680009b11c9ac382163e05ca43baf4127954c5f6d0250e7d772d2b80c", - "sha256:6e949a8a94186bced05b6508faa61b7adacc911115664ccb1923b9ad1f1ccf7b", - "sha256:73c7a935e62033bd5e8f0da33a4dcb763da2361921a69a5a95aaf6c93aa03a87", - "sha256:76ad8484379695f3fe46228962017a7e1337e9acadafed67eb20aabb175df98b", - "sha256:8350d4055505412a426b6ad8c521bc7d367d1637a762c70fdd93a3a0d595990b", - "sha256:87e9df830022488e235dd601478c15ad73a0389628588ba0b028cb74eb72fed8", - "sha256:8f9a542c979df62098ae9c58b19e03ad3df1c9d8c6895d96c0d51da17b243b1c", - "sha256:8fec441f5adcf81dd240a5fe78e3d83767999771630b5ddfc5867827a34fa3d3", - "sha256:9a03e16e55465177d416699331b0f3564138f1807ecc5f2de9d55d8f188d08c7", - "sha256:ba30a896166f0fee83183cec913298151b73164160d965af2e93a20bbd2ab605", - "sha256:c17d98799f32e3f55f181f19dd2021d762eb38fdd381b4a748b9f5a36738e935", - "sha256:c522392acc5e962bcac3b22b9592493ffd06d1fc5d755954e6be9f4990de932b", - "sha256:d0f9bd1fd919134d459d8abf954f63886745f4660ef66480b9d753a7c9d40927", - "sha256:d18d7f18a47de6863cd480734613502904611730f8def45fc52a5d97503e5101", - "sha256:d31481ccf4694a8416b681544c23bd271f5a123162ab603c7d7d2dd7dd901a07", - "sha256:e3e7065cbdabe6183ab82199d7a4f6b3ba0a438c5a512a68559846ccb76a78ec", - "sha256:eed82cdf79cd7f0232e2fdc1ad05b06a5e102a43e331f7d041e5f0e0a34a51c4", - "sha256:f970663fa4f7e80401a7b0cbeec00fa801bf0287d93d48368fc3e6fa32716245", - "sha256:f9b2fdca47dc855516b2d66eef3c39f2672cbf7e7a42e7e67ad2cbfcd6ba107d" + "sha256:0144c0ea9997b92615af1d94afc0c217e07ce2c14912c7b1a5731776329fcfc7", + "sha256:03e70d2df2258fb6cb0e95bbdbe06c16e608af94a3ffbd2b90c3f1e83eb10767", + "sha256:093b23e6906a8b97051191a4a0c73a77ecc958121d42346274c6af6520dec175", + "sha256:123587af84260c991dc5f62a6e7ef3d1c57dfddc99faacee508c71d287248459", + "sha256:17e32f147d8bf9657e0922c0940bcde863b894cd871dbb694beb6704cfbd2fb5", + "sha256:206afc3d964f9a233e6ad34618ec60b9837d0582b500b63687e34011e15bb429", + "sha256:4107ac5ab936a63952dea2a46a734a23230aa2f6f9db1291bf171dac3ebd53c6", + "sha256:4513ec234c68b14d4161440e07f995f231be21a09329051e67a2118a7a612d2d", + "sha256:611be3904f9843f0529c35a3ff3fd617449463cb4b73b1633950b3d97fa4bfb7", + "sha256:62c613ad689ddcb11707f030e722fa929f322ef7e4f18f5335d2b73c61a85c28", + "sha256:667f3c579e813fcbad1b784db7a1aaa96524bed53437e119f6a2f5de4db04235", + "sha256:6e8c70d2cd745daec2a08734d9f63092b793ad97612470a0ee4cbb8f5f705c57", + "sha256:7577b3c43e5909623149f76b099ac49a1a01ca4e167d1785c76eb52fa585745a", + "sha256:998d2be6976a0ee3a81fb8e2777900c28641fb5bfbd0c84717d89bca0addcdc5", + "sha256:a3c2c317a8fb53e5b3d25790553796105501a235343f5d2bf23bb8649c2c8709", + "sha256:ab998f567ebdf6b1da7dc1e5accfaa7c6992244629c0fdaef062f43249bd8dee", + "sha256:ac7041b385f04c047fcc2951dc001671dee1b7e0615cde772e84b01fbf68ee84", + "sha256:bca36be5707e81b9e6ce3208d92d95540d4ca244c006b61511753583c81c70dd", + "sha256:c9904904b6564d4ee8a1ed820db76185a3c96e05560c776c79a6ce5ab71888ba", + "sha256:cad0bbd66cd59fc474b4a4376bc5ac3fc698723510cbb64091c2a793b18654db", + "sha256:d10a681c9a1d5a77e75c48a3b8e1a9f2ae2928eda463e8d33660437705659682", + "sha256:d4925e4bf7b9bddd1c3de13c9b8a2cdb89a468f640e66fbfabaf735bd85b3e35", + "sha256:d7b9f5f3299e8dd230880b6c55504a1f69cf1e4316275d1b215ebdd8187ec88d", + "sha256:da2dfdaa8006eb6a71051795856bedd97e5b03e57da96f98e375682c48850645", + "sha256:dddba7ca1c807045323b6af4ff80f5ddc4d654c8bce8317dde1bd96b128ed253", + "sha256:e7921319fe4430b11278d924ef66d4daa469fafb1da679a2e48c935fa27af193", + "sha256:e93f451f2dfa433d97765ca2634628b789b49ba8b504fdde5837cdcf25fdb53b", + "sha256:eebaacf674fa25511e8867028d281e602ee6500045b57f43b08778082f7f8b44", + "sha256:ef0107bbb6a55f5be727cfc2ef945d5676b97bffb8425650dadbb184be9f9a2b", + "sha256:f0de0f284248ab40188f23380b03b59126d1479cd59940f2a34f8852db710625", + "sha256:f27279d060e2ab24c0aa98363ff906d2386aa6c4dc2f1a374655d4e02a6c5e5e", + "sha256:f8affdf3c0f0466e69f5b3917cdd042f89c8c63aebdb9f7c078996f607cdb0f5" ], "markers": "python_version >= '3.8'", - "version": "==4.0.0" + "version": "==4.0.1" } } } diff --git a/markdown2confluence/config.py b/markdown2confluence/config.py index 6a566da..1ac6604 100644 --- a/markdown2confluence/config.py +++ b/markdown2confluence/config.py @@ -57,7 +57,6 @@ def __init__(self, args=None): ) self.validate() - logger.debug("initialized logger with config:", self) def validate(self): missing_fields = [] diff --git a/markdown2confluence/main.py b/markdown2confluence/main.py index a1f4e60..77779f6 100644 --- a/markdown2confluence/main.py +++ b/markdown2confluence/main.py @@ -6,20 +6,19 @@ import importlib.metadata -config = Config() logger = Logger("main").get_logger() +version = importlib.metadata.version("markdown2confluence") def logo_and_version(): - print(LOGO_TEXT) - version = importlib.metadata.version("markdown2confluence") - print(f"Version: {version}\n") + logger.info(LOGO_TEXT) def main(): logo_and_version() - logger.info("Started markdown2confluence") + logger.info(f"Started markdown2confluence version: {version}") + config = Config() directory = config.markdown_folder logger.info("Parsing folder %s", directory) diff --git a/markdown2confluence/util.py b/markdown2confluence/util.py index 3e2a91a..fed5b5f 100644 --- a/markdown2confluence/util.py +++ b/markdown2confluence/util.py @@ -8,7 +8,7 @@ def __init__( self, name: str, log_file: str = 'markdown2confluence.log', - level: int = logging.DEBUG if os.getenv("DEBUG") else logging.ERROR + level: int = logging.DEBUG if os.getenv("DEBUG") else logging.INFO ): """ Initialize the Logger with a specified name and log file. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..570bfdd --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,22 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "markdown2confluence" +version = "0.2.0-rc.5" +description = "A tool to publish markdown to Confluence pages" +readme = "README.md" +license = {text = "MIT"} +authors = [ + { name="Otto Lote", email="otto.lote@innofactor.com" } +] + +dependencies = [ + # List your dependencies here + # "requests>=2.23.0", + # "beautifulsoup4>=4.9.0" +] + +[project.scripts] +markdown2confluence = "markdown2confluence.main:main_function" diff --git a/requirements.txt b/requirements.txt index 426def3..843be60 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,9 +7,8 @@ deprecated==1.2.14 ; python_version >= '2.7' and python_version not in '3.0, 3.1 idna==3.7 ; python_version >= '3.5' jmespath==1.0.1 ; python_version >= '3.7' markdown==3.6 --e file:///home/otto/dev/gh/innofactororg/markdown2confluence oauthlib==3.2.2 ; python_version >= '3.6' -requests==2.32.2 ; python_version >= '3.8' +requests==2.32.3 ; python_version >= '3.8' requests-oauthlib==2.0.0 ; python_version >= '3.4' six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' soupsieve==2.5 ; python_version >= '3.8' diff --git a/setup.py b/setup.py deleted file mode 100644 index 4bdfbdd..0000000 --- a/setup.py +++ /dev/null @@ -1,15 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name='markdown2confluence', - version='0.2.0-rc.5', - packages=find_packages(), - install_requires=[ - # dependencies - ], - entry_points={ - 'console_scripts': [ - 'markdown2confluence = markdown2confluence.main:main_function', - ], - }, -) From b44701f80e292cf01881237ff80d65be33e5c1e0 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Fri, 31 May 2024 13:56:36 +0200 Subject: [PATCH 10/19] Add github actions pipeline for tests + badge --- .github/workflows/test.yaml | 41 +++++++++++++++++++++++++++++++++++++ README.md | 4 ++++ 2 files changed, 45 insertions(+) create mode 100644 .github/workflows/test.yaml diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..e8cea04 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,41 @@ +name: tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + pip install coverage + pip install . + + - name: Run tests + run: coverage run -m pytest + + - name: Make coverage report + run: coverage lcov + + - name: Comment coverage report on PR + if: ${{ github.event_name == 'pull_request' }} + uses: romeovs/lcov-reporter-action@v0.3.1 + with: + lcov-file: coverage.lcov + delete-old-comments: true diff --git a/README.md b/README.md index f7dc030..2100609 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # markdown2confluence +[![Tests](https://github.com/innofactororg/markdown2confluence/actions/workflows/test.yaml/badge.svg)](https://github.com/innofactororg/markdown2confluence/actions/workflows/test.yaml) +![Python](https://img.shields.io/badge/python-3.12-blue.svg) +[![PEP8](https://img.shields.io/badge/code%20style-pep8-orange.svg)](https://www.python.org/dev/peps/pep-0008/) + Convert your Markdown files into Confluence pages with ease using the `markdown2confluence` script. It uploads all files from a specified Markdown directory to a Confluence space, applying a specific page label and maintaining the folder hierarchy as page structure. ## Prerequisites From c1068e351b1c24fd223a567628ee23e0ec55db24 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Tue, 4 Jun 2024 11:59:51 +0200 Subject: [PATCH 11/19] Add confluence_root_page config option --- markdown2confluence/config.py | 18 +++++++++++----- markdown2confluence/confluence.py | 9 +------- markdown2confluence/content_tree.py | 8 ++++++- markdown2confluence/publisher.py | 15 ++++++++++++- tests/unit/test_publisher.py | 33 ++++++++++++++++++++++++++++- 5 files changed, 67 insertions(+), 16 deletions(-) diff --git a/markdown2confluence/config.py b/markdown2confluence/config.py index 1ac6604..fdf54a1 100644 --- a/markdown2confluence/config.py +++ b/markdown2confluence/config.py @@ -41,6 +41,10 @@ def __init__(self, args=None): os.environ.get('CONFLUENCE_PAGE_LABEL') or 'markdown2confluence' ) + self.confluence_root_page = ( + args.confluence_root_page or + os.environ.get('CONFLUENCE_ROOT_PAGE') + ) self.markdown_folder = ( args.markdown_folder or @@ -98,7 +102,15 @@ def parse_args(): help="Confluence space key") parser.add_argument( '--confluence-parent-page-id', - help="Parent page ID under which to add the new page") + help="Parent page ID under which to add all top-level pages") + parser.add_argument( + '--confluence-page-label', + help=("Label to assign to Confluence pages managed by " + "markdown2confluence")) + parser.add_argument( + '--confluence-root-page', + help=("Add a top-level page under which all pages will be organized.") + ) parser.add_argument( '--markdown-folder', help="File or folder containing Markdown files to publish") @@ -112,9 +124,5 @@ def parse_args(): '--confluence-page-title-suffix', help="Suffix for Confluence page titles, to denote pages " "managed by markdown2confluence") - parser.add_argument( - '--confluence-page-label', - help=("Label to assign to Confluence pages managed by " - "markdown2confluence")) return parser.parse_args() diff --git a/markdown2confluence/confluence.py b/markdown2confluence/confluence.py index 723ee39..e75c3e3 100644 --- a/markdown2confluence/confluence.py +++ b/markdown2confluence/confluence.py @@ -50,22 +50,16 @@ def post_publish_hook(self): for page in self.stale_pages: page_id = page['content']['id'] title = page['content']['title'] - labels = page.get('metadata', {}).get('labels', []) if not title.endswith(self.suffix): logger.warning("Skipping deletion of unmanaged page %s", title) continue - if self.label not in labels: - logger.warning("Skipping deletion of page %s missing label %s", - title, self.label) - continue - self.confluence.remove_page(page_id) logger.info("Deleted unmanaged page %s", title) def publish_node(self, node: ContentNode, parent_id: str | None) -> str: - identifier = f"{node.name}{node.parent.name or ''}" + identifier = f"{node.name}{node.parent.name if node.parent else None}{self.config.confluence_root_page}" hash = hashlib.md5(identifier.encode('utf-8')).hexdigest()[:3] title = f"{node.name} #{hash} {self.suffix}" @@ -159,4 +153,3 @@ def _attach_files(self, page_id: str, node: ContentNode): page_id=page_id ) logger.info("Attached file %s to page ID %s", file, page_id) - diff --git a/markdown2confluence/content_tree.py b/markdown2confluence/content_tree.py index 2a7b7ed..b4d3a96 100644 --- a/markdown2confluence/content_tree.py +++ b/markdown2confluence/content_tree.py @@ -22,6 +22,10 @@ def is_leaf(self) -> bool: def is_root(self) -> bool: return self.parent is None + def _set_name(self, name: str): + self.name = name + return + def __str__(self, level: int = 0) -> str: ret = "\t" * level + repr(self.name) + "\n" for child in self.children.values(): @@ -63,6 +67,8 @@ def find_node(self, path_list: list) -> ContentNode | None: return None return current_node + def rename_root(self, name: str): + self.root._set_name(name) + def __str__(self) -> str: return str(self.root) - diff --git a/markdown2confluence/publisher.py b/markdown2confluence/publisher.py index ffc7f6f..4c4e31c 100644 --- a/markdown2confluence/publisher.py +++ b/markdown2confluence/publisher.py @@ -1,12 +1,16 @@ from abc import ABC, abstractmethod from markdown2confluence.util import Logger +from markdown2confluence.config import Config from markdown2confluence.content_tree import ContentTree, ContentNode logger = Logger(__name__).get_logger() class Publisher(ABC): + def __init__(self, config: Config | None = None): + self.config = config if config is not None else Config() + @abstractmethod def publish_node(self, node: ContentNode, parent_id: str | None) -> str: pass @@ -30,6 +34,10 @@ def post_publish_hook(self): def publish_content(self, content_tree: ContentTree): logger.debug("ContentTree:\n%s", content_tree) + root_page = self.config.confluence_root_page + if root_page: + content_tree.rename_root(root_page) + self.pre_publish_hook() def traverse_and_publish( @@ -37,7 +45,12 @@ def traverse_and_publish( parent_id: str | None = None): logger.debug("Processing node: %s", node.name) - if not node.is_root(): + if node.is_root(): + root_page = self.config.confluence_root_page + if root_page not in (None, ''): + parent_id = self.publish_node( + node, self.config.confluence_parent_page_id) + else: parent_id = self.publish_node(node, parent_id) for child in node.children.values(): diff --git a/tests/unit/test_publisher.py b/tests/unit/test_publisher.py index 2cc846a..05d513b 100644 --- a/tests/unit/test_publisher.py +++ b/tests/unit/test_publisher.py @@ -1,6 +1,7 @@ import unittest from unittest import mock from markdown2confluence.publisher import Publisher +from markdown2confluence.config import Config from markdown2confluence.content_tree import ContentTree, ContentNode @@ -12,7 +13,10 @@ def publish_node(self, node, parent_id): class TestPublisher(unittest.TestCase): def setUp(self): - self.publisher = MockPublisher() + self.publisher = MockPublisher(config=Config.__new__(Config)) + self.publisher.config.confluence_root_page = None + self.publisher.config.confluence_parent_page_id = 'mock_parent_id' + self.mock_publish_node = mock.patch.object( MockPublisher, 'publish_node', autospec=True ).start() @@ -22,6 +26,8 @@ def setUp(self): self.addCleanup(mock.patch.stopall) def test_publish_nested(self): + self.publisher.config.confluence_root_page = '' + # Create a simple content tree root = ContentNode(name='root') child1 = ContentNode(name='child1') @@ -46,6 +52,31 @@ def test_publish_nested(self): self.publisher, child3, 'mock_id_for_child2' ) + def test_publish_root_page(self): + self.publisher.config.confluence_root_page = 'rootpagename' + + # Create a simple content tree + root = ContentNode(name='root') + child1 = ContentNode(name='child1') + child2 = ContentNode(name='child2') + root.add_child(child1) + root.add_child(child2) + content_tree = ContentTree(root=root) + + # Call publish_content + self.publisher.publish_content(content_tree) + + self.assertEqual(self.mock_publish_node.call_count, 3) + self.mock_publish_node.assert_any_call( + self.publisher, root, 'mock_parent_id' + ) + self.mock_publish_node.assert_any_call( + self.publisher, child1, 'mock_id_for_rootpagename' + ) + self.mock_publish_node.assert_any_call( + self.publisher, child2, 'mock_id_for_rootpagename' + ) + def test_publish_with_circular_reference(self): # Create nodes with circular references node_a = ContentNode(name='A') From 46e2780b4202f041306e4e0144f2c258b46416c5 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Tue, 4 Jun 2024 14:16:15 +0200 Subject: [PATCH 12/19] Add attachments to content tree metadata and fix regex --- markdown2confluence/parser.py | 27 +++++++- tests/unit/test_parser.py | 124 ++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 1 deletion(-) diff --git a/markdown2confluence/parser.py b/markdown2confluence/parser.py index bca1d18..9269acc 100644 --- a/markdown2confluence/parser.py +++ b/markdown2confluence/parser.py @@ -1,10 +1,14 @@ import os +import re from abc import ABC, abstractmethod from collections.abc import Iterator +from markdown2confluence.util import Logger from markdown2confluence.content_tree import ContentTree +logger = Logger(__name__).get_logger() + class Parser(ABC): @abstractmethod @@ -19,7 +23,13 @@ def parse_directory(self, directory: str) -> ContentTree: for file_path in self._get_markdown_files(directory): content = self._read_file_content(file_path) path_list = self._get_relative_path_as_list(file_path, directory) - content_tree.add_node(path_list=path_list, content=content) + attachments = self._get_media_references(content) + + content_tree.add_node( + path_list=path_list, + content=content, + metadata={'attachments': attachments} + ) return content_tree def _get_markdown_files(self, directory: str) -> Iterator[str]: @@ -34,6 +44,21 @@ def _read_file_content(self, file_path: str) -> str: with open(file_path, 'r', encoding='utf-8') as md_file: return md_file.read() + def _get_media_references(self, markdown: str) -> list[str]: + files_to_upload = [] + + for line in markdown.splitlines(): + match = re.search( + r"!\[.*?\]\((?!http)(.*?\.(?:jpg|jpeg|png|gif|bmp|svg|webp|tiff))\)", # noqa E501 + line + ) + if match: + file_path = match.group(1) + logger.debug(f"Found file for attaching: {file_path}") + files_to_upload.append(file_path) + + return files_to_upload + def _get_relative_path_as_list( self, file_path: str, base_directory: str) -> list[str]: return os.path.relpath(file_path, base_directory).split(os.sep) diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index 4c13b87..7d17cfd 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -73,6 +73,130 @@ def test_parse_directory_with_multiple_markdown_files( tree.root.children['test2.md'].content, "Content of /dir_with_multiple_md/test2.md") + def test_get_media_references_no_media(self): + markdown = """ + # Title + This is a test markdown without any media links. + """ + result = self.parser._get_media_references(markdown) + self.assertEqual(result, []) + + def test_get_media_references_with_local_media(self): + markdown = """ + # Title + ![Alt text](image1.png) + Some text. + ![Alt text](folder/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['image1.png', 'folder/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_mixed_links(self): + markdown = """ + # Title + ![Alt text](image1.png) + Some text. + ![Alt text](http://example.com/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['image1.png'] + self.assertEqual(result, expected) + + def test_get_media_references_with_relative_links(self): + markdown = """ + # Title + ![Alt text](./relative/image1.png) + Some text. + ![Alt text](../parent/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['./relative/image1.png', '../parent/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_nested_links(self): + markdown = """ + # Title + ![Alt text](folder/subfolder/image1.png) + Some text. + ![Alt text](folder/subfolder/deep/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['folder/subfolder/image1.png', + 'folder/subfolder/deep/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_empty_url(self): + markdown = """ + # Title + ![Alt text]() + Some text. + ![Alt text](folder/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['folder/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_broken_markdown(self): + markdown = """ + # Title + ![Alt text](image1.png + Some text. + ![Alt text](folder/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['folder/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_html_image_tag(self): + markdown = """ + # Title + Alt text + Some text. + ![Alt text](folder/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['folder/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_special_chars(self): + markdown = """ + # Title + ![Alt text](image_1.png) + Some text. + ![Alt text](folder/image-2.jpg) + ![Alt text](folder/image.3.gif) + ![Alt text](folder/image@4.bmp) + ![Alt text](folder/image#5.jpeg) + ![Alt text](folder/image$6.png) + ![Alt text](folder/image&7.jpg) + ![Alt text](folder/image(8).gif) + ![Alt text](folder/image)9.bmp) + ![Alt text](folder/image+10.jpeg) + ![Alt text](folder/image,11.png) + ![Alt text](folder/image;12.jpg) + ![Alt text](folder/image=13.gif) + ![Alt text](folder/image[14].bmp) + ![Alt text](folder/image]15.jpeg) + ![Alt text](folder/image{16}.png) + ![Alt text](folder/image}17.jpg) + ![Alt text](folder/image~18.gif) + ![Alt text](folder/image!19.bmp) + ![Alt text](folder/image%20.jpeg) + """ + result = self.parser._get_media_references(markdown) + expected = [ + 'image_1.png', 'folder/image-2.jpg', 'folder/image.3.gif', + 'folder/image@4.bmp', 'folder/image#5.jpeg', 'folder/image$6.png', + 'folder/image&7.jpg', 'folder/image(8).gif', 'folder/image)9.bmp', + 'folder/image+10.jpeg', 'folder/image,11.png', 'folder/image;12.jpg', + 'folder/image=13.gif', 'folder/image[14].bmp', 'folder/image]15.jpeg', + 'folder/image{16}.png', 'folder/image}17.jpg', 'folder/image~18.gif', + 'folder/image!19.bmp', 'folder/image%20.jpeg' + ] + self.assertEqual(result, expected) + if __name__ == '__main__': unittest.main() + From d500fd3596e6aefdee4a7d20268fc60312bf5cec Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Tue, 4 Jun 2024 15:45:05 +0200 Subject: [PATCH 13/19] Add parser tests --- tests/unit/test_parser.py | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index 7d17cfd..a3c2cf1 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -196,7 +196,47 @@ def test_get_media_references_with_special_chars(self): ] self.assertEqual(result, expected) + def test_get_relative_path_as_list(self): + base_directory = '/home/user/project' + file_path = '/home/user/project/docs/file.md' + expected = ['docs', 'file.md'] + result = self.parser._get_relative_path_as_list( + file_path, base_directory) + self.assertEqual(result, expected) + + file_path = '/home/user/project/docs/subdir/file.md' + expected = ['docs', 'subdir', 'file.md'] + result = self.parser._get_relative_path_as_list( + file_path, base_directory) + self.assertEqual(result, expected) + + file_path = '/home/user/project/file.md' + expected = ['file.md'] + result = self.parser._get_relative_path_as_list( + file_path, base_directory) + self.assertEqual(result, expected) + + file_path = '/home/user/another_project/file.md' + expected = ['..', 'another_project', 'file.md'] + result = self.parser._get_relative_path_as_list( + file_path, base_directory) + self.assertEqual(result, expected) + + @mock.patch('os.path.exists') + @mock.patch('builtins.open', new_callable=mock.mock_open, read_data='test content') + def test_read_file_content_success(self, mock_open, mock_exists): + mock_exists.return_value = True + result = self.parser._read_file_content('/path/to/file.md') + mock_open.assert_called_once_with( + '/path/to/file.md', 'r', encoding='utf-8') + self.assertEqual(result, 'test content') + + @mock.patch('os.path.exists') + def test_read_file_content_file_not_found(self, mock_exists): + mock_exists.return_value = False + with self.assertRaises(FileNotFoundError): + self.parser._read_file_content('/path/to/nonexistent.md') + if __name__ == '__main__': unittest.main() - From d83adc7d6a46cdbed6b2dcfa6998f6f22e1ec9a8 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Tue, 4 Jun 2024 15:51:44 +0200 Subject: [PATCH 14/19] Remove unnecessary test invocation code --- tests/unit/test_content_tree.py | 5 ----- tests/unit/test_parser.py | 4 ---- tests/unit/test_publisher.py | 4 ---- 3 files changed, 13 deletions(-) diff --git a/tests/unit/test_content_tree.py b/tests/unit/test_content_tree.py index f164fd8..c660c5b 100644 --- a/tests/unit/test_content_tree.py +++ b/tests/unit/test_content_tree.py @@ -81,8 +81,3 @@ def test_add_same_node_twice(self): self.tree.add_node(['level1', 'level2'], content='second') node = self.tree.find_node(['level1', 'level2']) self.assertEqual(node.content, 'second') - - -if __name__ == '__main__': - unittest.main() - diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index a3c2cf1..3ea0df0 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -236,7 +236,3 @@ def test_read_file_content_file_not_found(self, mock_exists): mock_exists.return_value = False with self.assertRaises(FileNotFoundError): self.parser._read_file_content('/path/to/nonexistent.md') - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/unit/test_publisher.py b/tests/unit/test_publisher.py index 05d513b..18a2e75 100644 --- a/tests/unit/test_publisher.py +++ b/tests/unit/test_publisher.py @@ -115,7 +115,3 @@ def test_pre_post_hooks_called(self): self.publisher.pre_publish_hook.assert_called_once() self.publisher.post_publish_hook.assert_called_once() - - -if __name__ == '__main__': - unittest.main() From fcd4a0e48401c2ad862d851ac5ff2141a275e8b8 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Thu, 5 Sep 2024 14:15:42 +0200 Subject: [PATCH 15/19] Update dependencies --- Pipfile.lock | 124 +++++++++++++++++++++++++-------------------------- 1 file changed, 61 insertions(+), 63 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 61b5bed..6c4dcea 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -18,11 +18,11 @@ "default": { "atlassian-python-api": { "hashes": [ - "sha256:b77b081da3242794060f553079d93a9b26bd0aa047d86abf1ae9e7bcf59fe4e8", - "sha256:f3887c5fe0149e90d22cd0fd8d99cd6a626e74ce80c190a40515d02c4a7a1a92" + "sha256:1c271ca9b1688acdaef09ad6f763570868a381394530d1fba49b5b104fffe54a", + "sha256:3c852f38ad8645887fbfe1526c12f2c1951ba06a24a1bbb36bdf7ccdc6d7b1ac" ], "index": "pypi", - "version": "==3.41.13" + "version": "==3.41.15" }, "beautifulsoup4": { "hashes": [ @@ -34,11 +34,11 @@ }, "certifi": { "hashes": [ - "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", - "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" + "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8", + "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9" ], "markers": "python_version >= '3.6'", - "version": "==2024.2.2" + "version": "==2024.8.30" }, "charset-normalizer": { "hashes": [ @@ -146,11 +146,11 @@ }, "idna": { "hashes": [ - "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc", - "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0" + "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac", + "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603" ], - "markers": "python_version >= '3.5'", - "version": "==3.7" + "markers": "python_version >= '3.6'", + "version": "==3.8" }, "jmespath": { "hashes": [ @@ -162,11 +162,11 @@ }, "markdown": { "hashes": [ - "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f", - "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224" + "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2", + "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803" ], "index": "pypi", - "version": "==3.6" + "version": "==3.7" }, "oauthlib": { "hashes": [ @@ -202,19 +202,19 @@ }, "soupsieve": { "hashes": [ - "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", - "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7" + "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", + "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9" ], "markers": "python_version >= '3.8'", - "version": "==2.5" + "version": "==2.6" }, "urllib3": { "hashes": [ - "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", - "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19" + "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", + "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168" ], "markers": "python_version >= '3.8'", - "version": "==2.2.1" + "version": "==2.2.2" }, "wrapt": { "hashes": [ @@ -318,11 +318,11 @@ }, "packaging": { "hashes": [ - "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", - "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" ], - "markers": "python_version >= '3.7'", - "version": "==24.0" + "markers": "python_version >= '3.8'", + "version": "==24.1" }, "pluggy": { "hashes": [ @@ -334,11 +334,11 @@ }, "pytest": { "hashes": [ - "sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd", - "sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1" + "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5", + "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce" ], "index": "pypi", - "version": "==8.2.1" + "version": "==8.3.2" }, "pytest-watch": { "hashes": [ @@ -349,49 +349,47 @@ }, "setuptools": { "hashes": [ - "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4", - "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0" + "sha256:5f4c08aa4d3ebcb57a50c33b1b07e94315d7fc7230f7115e47fc99776c8ce308", + "sha256:95b40ed940a1c67eb70fc099094bd6e99c6ee7c23aa2306f4d2697ba7916f9c6" ], "index": "pypi", - "version": "==70.0.0" + "version": "==74.1.2" }, "watchdog": { "hashes": [ - "sha256:0144c0ea9997b92615af1d94afc0c217e07ce2c14912c7b1a5731776329fcfc7", - "sha256:03e70d2df2258fb6cb0e95bbdbe06c16e608af94a3ffbd2b90c3f1e83eb10767", - "sha256:093b23e6906a8b97051191a4a0c73a77ecc958121d42346274c6af6520dec175", - "sha256:123587af84260c991dc5f62a6e7ef3d1c57dfddc99faacee508c71d287248459", - "sha256:17e32f147d8bf9657e0922c0940bcde863b894cd871dbb694beb6704cfbd2fb5", - "sha256:206afc3d964f9a233e6ad34618ec60b9837d0582b500b63687e34011e15bb429", - "sha256:4107ac5ab936a63952dea2a46a734a23230aa2f6f9db1291bf171dac3ebd53c6", - "sha256:4513ec234c68b14d4161440e07f995f231be21a09329051e67a2118a7a612d2d", - "sha256:611be3904f9843f0529c35a3ff3fd617449463cb4b73b1633950b3d97fa4bfb7", - "sha256:62c613ad689ddcb11707f030e722fa929f322ef7e4f18f5335d2b73c61a85c28", - "sha256:667f3c579e813fcbad1b784db7a1aaa96524bed53437e119f6a2f5de4db04235", - "sha256:6e8c70d2cd745daec2a08734d9f63092b793ad97612470a0ee4cbb8f5f705c57", - "sha256:7577b3c43e5909623149f76b099ac49a1a01ca4e167d1785c76eb52fa585745a", - "sha256:998d2be6976a0ee3a81fb8e2777900c28641fb5bfbd0c84717d89bca0addcdc5", - "sha256:a3c2c317a8fb53e5b3d25790553796105501a235343f5d2bf23bb8649c2c8709", - "sha256:ab998f567ebdf6b1da7dc1e5accfaa7c6992244629c0fdaef062f43249bd8dee", - "sha256:ac7041b385f04c047fcc2951dc001671dee1b7e0615cde772e84b01fbf68ee84", - "sha256:bca36be5707e81b9e6ce3208d92d95540d4ca244c006b61511753583c81c70dd", - "sha256:c9904904b6564d4ee8a1ed820db76185a3c96e05560c776c79a6ce5ab71888ba", - "sha256:cad0bbd66cd59fc474b4a4376bc5ac3fc698723510cbb64091c2a793b18654db", - "sha256:d10a681c9a1d5a77e75c48a3b8e1a9f2ae2928eda463e8d33660437705659682", - "sha256:d4925e4bf7b9bddd1c3de13c9b8a2cdb89a468f640e66fbfabaf735bd85b3e35", - "sha256:d7b9f5f3299e8dd230880b6c55504a1f69cf1e4316275d1b215ebdd8187ec88d", - "sha256:da2dfdaa8006eb6a71051795856bedd97e5b03e57da96f98e375682c48850645", - "sha256:dddba7ca1c807045323b6af4ff80f5ddc4d654c8bce8317dde1bd96b128ed253", - "sha256:e7921319fe4430b11278d924ef66d4daa469fafb1da679a2e48c935fa27af193", - "sha256:e93f451f2dfa433d97765ca2634628b789b49ba8b504fdde5837cdcf25fdb53b", - "sha256:eebaacf674fa25511e8867028d281e602ee6500045b57f43b08778082f7f8b44", - "sha256:ef0107bbb6a55f5be727cfc2ef945d5676b97bffb8425650dadbb184be9f9a2b", - "sha256:f0de0f284248ab40188f23380b03b59126d1479cd59940f2a34f8852db710625", - "sha256:f27279d060e2ab24c0aa98363ff906d2386aa6c4dc2f1a374655d4e02a6c5e5e", - "sha256:f8affdf3c0f0466e69f5b3917cdd042f89c8c63aebdb9f7c078996f607cdb0f5" + "sha256:14dd4ed023d79d1f670aa659f449bcd2733c33a35c8ffd88689d9d243885198b", + "sha256:29e4a2607bd407d9552c502d38b45a05ec26a8e40cc7e94db9bb48f861fa5abc", + "sha256:3960136b2b619510569b90f0cd96408591d6c251a75c97690f4553ca88889769", + "sha256:3e8d5ff39f0a9968952cce548e8e08f849141a4fcc1290b1c17c032ba697b9d7", + "sha256:53ed1bf71fcb8475dd0ef4912ab139c294c87b903724b6f4a8bd98e026862e6d", + "sha256:5597c051587f8757798216f2485e85eac583c3b343e9aa09127a3a6f82c65ee8", + "sha256:638bcca3d5b1885c6ec47be67bf712b00a9ab3d4b22ec0881f4889ad870bc7e8", + "sha256:6bec703ad90b35a848e05e1b40bf0050da7ca28ead7ac4be724ae5ac2653a1a0", + "sha256:726eef8f8c634ac6584f86c9c53353a010d9f311f6c15a034f3800a7a891d941", + "sha256:72990192cb63872c47d5e5fefe230a401b87fd59d257ee577d61c9e5564c62e5", + "sha256:7d1aa7e4bb0f0c65a1a91ba37c10e19dabf7eaaa282c5787e51371f090748f4b", + "sha256:8c47150aa12f775e22efff1eee9f0f6beee542a7aa1a985c271b1997d340184f", + "sha256:901ee48c23f70193d1a7bc2d9ee297df66081dd5f46f0ca011be4f70dec80dab", + "sha256:963f7c4c91e3f51c998eeff1b3fb24a52a8a34da4f956e470f4b068bb47b78ee", + "sha256:9814adb768c23727a27792c77812cf4e2fd9853cd280eafa2bcfa62a99e8bd6e", + "sha256:aa9cd6e24126d4afb3752a3e70fce39f92d0e1a58a236ddf6ee823ff7dba28ee", + "sha256:b6dc8f1d770a8280997e4beae7b9a75a33b268c59e033e72c8a10990097e5fde", + "sha256:b84bff0391ad4abe25c2740c7aec0e3de316fdf7764007f41e248422a7760a7f", + "sha256:ba32efcccfe2c58f4d01115440d1672b4eb26cdd6fc5b5818f1fb41f7c3e1889", + "sha256:bda40c57115684d0216556671875e008279dea2dc00fcd3dde126ac8e0d7a2fb", + "sha256:c4a440f725f3b99133de610bfec93d570b13826f89616377715b9cd60424db6e", + "sha256:d010be060c996db725fbce7e3ef14687cdcc76f4ca0e4339a68cc4532c382a73", + "sha256:d2ab34adc9bf1489452965cdb16a924e97d4452fcf88a50b21859068b50b5c3b", + "sha256:d7594a6d32cda2b49df3fd9abf9b37c8d2f3eab5df45c24056b4a671ac661619", + "sha256:d961f4123bb3c447d9fcdcb67e1530c366f10ab3a0c7d1c0c9943050936d4877", + "sha256:dae7a1879918f6544201d33666909b040a46421054a50e0f773e0d870ed7438d", + "sha256:dcebf7e475001d2cdeb020be630dc5b687e9acdd60d16fea6bb4508e7b94cf76", + "sha256:f627c5bf5759fdd90195b0c0431f99cff4867d212a67b384442c51136a098ed7", + "sha256:f8b2918c19e0d48f5f20df458c84692e2a054f02d9df25e6c3c930063eca64c1", + "sha256:fb223456db6e5f7bd9bbd5cd969f05aae82ae21acc00643b60d81c770abd402b" ], - "markers": "python_version >= '3.8'", - "version": "==4.0.1" + "markers": "python_version >= '3.9'", + "version": "==5.0.2" } } } From db8020b99965714f2ada9179ee00bd6d0897a9d5 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Thu, 5 Sep 2024 14:17:04 +0200 Subject: [PATCH 16/19] Fix edge-cases with attachments, improve attachment logic/interface --- markdown2confluence/confluence.py | 21 +++++++++++++-------- markdown2confluence/parser.py | 8 +++++++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/markdown2confluence/confluence.py b/markdown2confluence/confluence.py index e75c3e3..af7e9ee 100644 --- a/markdown2confluence/confluence.py +++ b/markdown2confluence/confluence.py @@ -80,7 +80,8 @@ def publish_node(self, node: ContentNode, parent_id: str | None) -> str: f"Found no existing page for title {title}") page_id = self._create_page(title, content, parent_page, node) - self._attach_files(page_id, node) + if node.metadata: + self._attach_files(page_id, node.metadata.get('attachments', [])) return str(page_id) def _get_existing_page_id(self, title: str) -> str | None: @@ -143,13 +144,17 @@ def _update_page(self, page_id: str, title: str, content: str, logger.error("HTTP error occurred: %s", e.response.text) raise - def _attach_files(self, page_id: str, node: ContentNode): - if not node.metadata: - return + def _attach_files(self, page_id: str, attachments: list[dict]): + for attachment in attachments: + logger.debug(attachment) + + name = attachment['reference'] + filename = attachment['file_path'] - for file in node.metadata.get('attachments', []): self.confluence.attach_file( - filename=file, - page_id=page_id + filename=filename, + name=name, + page_id=page_id, ) - logger.info("Attached file %s to page ID %s", file, page_id) + logger.info("Attached file %s with reference %s to page ID %s", + filename, name, page_id) diff --git a/markdown2confluence/parser.py b/markdown2confluence/parser.py index 9269acc..099614d 100644 --- a/markdown2confluence/parser.py +++ b/markdown2confluence/parser.py @@ -23,7 +23,13 @@ def parse_directory(self, directory: str) -> ContentTree: for file_path in self._get_markdown_files(directory): content = self._read_file_content(file_path) path_list = self._get_relative_path_as_list(file_path, directory) - attachments = self._get_media_references(content) + attachments = [] + for ref in self._get_media_references(content): + file_path = os.path.join(directory, ref) + attachments.append({ + "reference": ref, + "file_path": file_path, + }) content_tree.add_node( path_list=path_list, From 8dcd8e0eec09d37e3037bbe1c2151cbac4ec61c8 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Fri, 6 Sep 2024 14:08:12 +0200 Subject: [PATCH 17/19] WIP MinimalConfluence client [skip ci] --- markdown2confluence/api.py | 69 +++++++++++++++++++++++++++ tests/unit/test_api.py | 95 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 markdown2confluence/api.py create mode 100644 tests/unit/test_api.py diff --git a/markdown2confluence/api.py b/markdown2confluence/api.py new file mode 100644 index 0000000..fe74e47 --- /dev/null +++ b/markdown2confluence/api.py @@ -0,0 +1,69 @@ +import requests +from requests.auth import HTTPBasicAuth +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + + +class MinimalConfluence: + def __init__(self, url: str, username: str, password: str): + self.url = url if url.endswith('/') else url + '/' + self.api = requests.Session() + retries = Retry( + total=5, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504] + ) + self.api.mount('https://', HTTPAdapter(max_retries=retries)) + + if username and password: + self.api.auth = HTTPBasicAuth(username, password) + else: + raise ValueError( + 'Both username and password (api token) must be set.') + + def _request(self, method: str, path: str, **kwargs): + url = f'{self.url}{path}' + response = self.api.request(method, url, **kwargs) + response.raise_for_status() + return response.json() + + def _get(self, path: str, **kwargs): + return self._request('GET', path, **kwargs) + + def _post(self, path: str, **kwargs): + return self._request('POST', path, **kwargs) + + def _put(self, path: str, **kwargs): + return self._request('PUT', path, **kwargs) + + def search(self, cql: str | None): + # Add logic to perform a CQL search in Confluence + pass + + def create_page(self, space: str, title: str, body: str, + parent_id: str | None): + # Add logic to create a page in Confluence + pass + + def update_page(self, page_id: str, title: str, body: str, + version: int | None): + # Add logic to update a page in Confluence + pass + + def remove_page(self, page_id: str | None): + # Add logic to remove a page in Confluence + pass + + def create_attachment(self, page_id: str, file_path: str, + comment: str | None): + # Add logic to create an attachment in Confluence + pass + + def get_attachments(self, page_id: str | None): + # Add logic to get attachments from a page in Confluence + pass + + def update_attachment(self, attachment_id: str, file_path: str, + comment: str | None): + # Add logic to update an attachment in Confluence + pass diff --git a/tests/unit/test_api.py b/tests/unit/test_api.py new file mode 100644 index 0000000..ece121f --- /dev/null +++ b/tests/unit/test_api.py @@ -0,0 +1,95 @@ +import unittest +from unittest.mock import patch, MagicMock +from markdown2confluence.api import MinimalConfluence +from requests.exceptions import HTTPError + + +class TestMinimalConfluence(unittest.TestCase): + + def setUp(self): + self.url = 'https://confluence.example.com' + self.username = 'user' + self.password = 'pass' + self.confluence = MinimalConfluence(self.url, self.username, self.password) + + @patch('markdown2confluence.api.requests.Session.request') + def test_request(self, mock_request): + mock_request.return_value.status_code = 200 + mock_request.return_value.json.return_value = {'key': 'value'} + self.assertEqual( + self.confluence._request('GET', 'path'), + {'key': 'value'} + ) + + mock_request.return_value.raise_for_status.side_effect = HTTPError + with self.assertRaises(HTTPError): + self.confluence._request('GET', 'path') + + @patch('markdown2confluence.api.requests.Session.request') + def test_get(self, mock_request): + self.confluence._get('path') + mock_request.assert_called_with('GET', f'{self.url}/path') + + @patch('markdown2confluence.api.requests.Session.request') + def test_post(self, mock_request): + self.confluence._post('path') + mock_request.assert_called_with('POST', f'{self.url}/path') + + @patch('markdown2confluence.api.requests.Session.request') + def test_put(self, mock_request): + self.confluence._put('path') + mock_request.assert_called_with('PUT', f'{self.url}/path') + + @patch('markdown2confluence.api.requests.Session.request') + def test_search(self, mock_request): + # Since search is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.search('cql') + + @patch('markdown2confluence.api.requests.Session.request') + def test_create_page(self, mock_request): + # Since create_page is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.create_page( + space='SPACE', title='Title', body='Body', parent_id=None + ) + + @patch('markdown2confluence.api.requests.Session.request') + def test_update_page(self, mock_request): + # Since update_page is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.update_page(page_id='123', title='Title', body='Body', + version=None) + + @patch('markdown2confluence.api.requests.Session.request') + def test_remove_page(self, mock_request): + # Since remove_page is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.remove_page(page_id=None) + + @patch('markdown2confluence.api.requests.Session.request') + def test_create_attachment(self, mock_request): + # Since create_attachment is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.create_attachment( + page_id='123', file_path='file.txt', comment=None + ) + + @patch('markdown2confluence.api.requests.Session.request') + def test_get_attachments(self, mock_request): + # Since get_attachments is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.get_attachments(page_id=None) + + @patch('markdown2confluence.api.requests.Session.request') + def test_update_attachment(self, mock_request): + # Since update_attachment is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.update_attachment( + attachment_id='123', file_path='file.txt', comment=None + ) + + +if __name__ == '__main__': + unittest.main() + From 5f51658c29a11fcf4d6cc4cdc63bfcc95d6a130d Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Wed, 9 Oct 2024 15:00:50 +0200 Subject: [PATCH 18/19] WIP [skip ci] --- markdown2confluence/api.py | 87 +++++++++++++++++++++++-------- markdown2confluence/confluence.py | 36 +++++++------ 2 files changed, 84 insertions(+), 39 deletions(-) diff --git a/markdown2confluence/api.py b/markdown2confluence/api.py index fe74e47..22d0070 100644 --- a/markdown2confluence/api.py +++ b/markdown2confluence/api.py @@ -36,34 +36,77 @@ def _post(self, path: str, **kwargs): def _put(self, path: str, **kwargs): return self._request('PUT', path, **kwargs) - def search(self, cql: str | None): - # Add logic to perform a CQL search in Confluence - pass + def search(self, cql: str): + path = 'rest/api/content/search' + params = {'cql': cql} + return self._get(path, params=params) + + def get_page_by_id(self, page_id: str): + path = f'rest/api/content/{page_id}' + return self._get(path) def create_page(self, space: str, title: str, body: str, - parent_id: str | None): - # Add logic to create a page in Confluence - pass + parent_id: str): + path = 'rest/api/content' + data = { + 'type': 'page', + 'title': title, + 'space': {'key': space}, + 'body': { + 'storage': { + 'value': body, + 'representation': 'storage' + } + } + } + if parent_id: + data['ancestors'] = [{'id': parent_id}] + return self._post(path, json=data) - def update_page(self, page_id: str, title: str, body: str, - version: int | None): - # Add logic to update a page in Confluence - pass + def update_page(self, page_id: str, title: str, parent_id: int, + body: str, version: int): + path = f'rest/api/content/{page_id}' + print("version is: ", version, type(version)) + data = { + 'id': page_id, + 'status': 'current', + 'title': title, + 'type': 'page', + 'parentId': parent_id, + 'version': {'number': version}, + 'body': { + 'storage': { + 'value': body, + 'representation': 'storage' + } + } + } + return self._put(path, json=data) - def remove_page(self, page_id: str | None): - # Add logic to remove a page in Confluence - pass + def remove_page(self, page_id: str): + path = f'rest/api/content/{page_id}' + return self._request('DELETE', path) def create_attachment(self, page_id: str, file_path: str, - comment: str | None): - # Add logic to create an attachment in Confluence - pass + comment: str): + path = f'rest/api/content/{page_id}/child/attachment' + files = {'file': open(file_path, 'rb')} + params = {'comment': comment} if comment else {} + return self._post(path, files=files, params=params) - def get_attachments(self, page_id: str | None): - # Add logic to get attachments from a page in Confluence - pass + def get_attachments(self, page_id: str): + path = f'rest/api/content/{page_id}/child/attachment' + return self._get(path) def update_attachment(self, attachment_id: str, file_path: str, - comment: str | None): - # Add logic to update an attachment in Confluence - pass + comment: str): + path = f'rest/api/content/{attachment_id}/data' + files = {'file': open(file_path, 'rb')} + params = {'comment': comment} if comment else {} + return self._post(path, files=files, params=params) + + # TODO: validate + def set_page_label(self, page_id: str, label: str): + path = f'rest/api/content/{page_id}/label' + data = {'prefix': 'global', 'name': label} + return self._post(path, json=data) diff --git a/markdown2confluence/confluence.py b/markdown2confluence/confluence.py index af7e9ee..5861825 100644 --- a/markdown2confluence/confluence.py +++ b/markdown2confluence/confluence.py @@ -1,4 +1,4 @@ -from atlassian import Confluence +from markdown2confluence.api import MinimalConfluence as Confluence import requests import hashlib @@ -19,7 +19,6 @@ def __init__(self, confluence: Confluence | None = None): url=self.config.confluence_url, username=self.config.confluence_username, password=self.config.confluence_password, - cloud=True ) self.suffix = self.config.confluence_page_title_suffix self.label = self.config.confluence_page_label @@ -41,15 +40,16 @@ def pre_publish_hook(self): f"AND label='{self.label}' " f"AND title~'{self.suffix}'" ) - self.stale_pages: list[dict[str, any]] = self.confluence.cql( - cql, start=0, limit=None).get('results', []) + self.stale_pages: list[dict[str, any]] = self.confluence.search( + cql).get('results', []) logger.info("Fetched %d stale pages", len(self.stale_pages)) + logger.debug("Stale pages: %s", self.stale_pages) def post_publish_hook(self): logger.debug(f"Found {len(self.stale_pages)} remaining stale pages") for page in self.stale_pages: - page_id = page['content']['id'] - title = page['content']['title'] + page_id = page['id'] + title = page['title'] if not title.endswith(self.suffix): logger.warning("Skipping deletion of unmanaged page %s", title) @@ -70,11 +70,12 @@ def publish_node(self, node: ContentNode, parent_id: str | None) -> str: else self.config.confluence_parent_page_id ) - page_id = self._get_existing_page_id(title) - if page_id: + page = self._get_existing_page(title) + if page and page['id']: logger.debug( - f"Found existing page: {page_id} matching title {title}") - self._update_page(page_id, title, content, parent_page, node) + f"Found existing page: {page['id']} matching title {title}") + self._update_page(page['id'], title, content, + parent_page, node) else: logger.debug( f"Found no existing page for title {title}") @@ -84,11 +85,11 @@ def publish_node(self, node: ContentNode, parent_id: str | None) -> str: self._attach_files(page_id, node.metadata.get('attachments', [])) return str(page_id) - def _get_existing_page_id(self, title: str) -> str | None: + def _get_existing_page(self, title: str) -> dict | None: for page in self.stale_pages: - if page['content']['title'] == title: + if page['title'] == title: self.stale_pages.remove(page) - return page['content']['id'] + return page return None def _create_page(self, title: str, content: str, parent_id: int | None, @@ -122,15 +123,16 @@ def _update_page(self, page_id: str, title: str, content: str, parent_id: int | None, node: ContentNode): logger.debug(f"updating page {node.name} with parent {page_id}") try: + page = self.confluence.get_page_by_id(page_id) + version = int(page['version']['number'] + + 1) if 'version' in page else 1 + print("page: ", page, "version: ", version) self.confluence.update_page( page_id=page_id, title=title, body=content, parent_id=parent_id, - type='page', - representation='storage', - minor_edit=False, - full_width=False + version=version, ) self.confluence.set_page_label(page_id, self.label) logger.info("Updated page %s with ID %s and label %s", From cabda10de0c93a89437b83feb5dadda35594cd34 Mon Sep 17 00:00:00 2001 From: Otto Lote Date: Thu, 17 Oct 2024 11:26:14 +0200 Subject: [PATCH 19/19] WIP [skip ci] --- markdown2confluence/api.py | 55 +++++++++++++++++++++++-------- markdown2confluence/config.py | 33 ++++++++++++------- markdown2confluence/confluence.py | 47 +++++++++++++++----------- 3 files changed, 90 insertions(+), 45 deletions(-) diff --git a/markdown2confluence/api.py b/markdown2confluence/api.py index 22d0070..7cad1f1 100644 --- a/markdown2confluence/api.py +++ b/markdown2confluence/api.py @@ -25,6 +25,10 @@ def _request(self, method: str, path: str, **kwargs): url = f'{self.url}{path}' response = self.api.request(method, url, **kwargs) response.raise_for_status() + + if response.status_code == 204: # No Content + return None + return response.json() def _get(self, path: str, **kwargs): @@ -36,6 +40,31 @@ def _post(self, path: str, **kwargs): def _put(self, path: str, **kwargs): return self._request('PUT', path, **kwargs) + def _del(self, path: str, **kwargs): + return self._request('DELETE', path, **kwargs) + + def get_space_id_from_key(self, space_key: str) -> str: + path = 'api/v2/spaces' + response = self._get(path) + if response is None: + raise ValueError('Failed to retrieve spaces.') + + for space in response.get('results', []): + if space.get('key') == space_key: + return space.get('id') + raise ValueError(f'Space key {space_key} not found.') + + def get_space_key_from_id(self, space_id: str) -> str: + path = 'api/v2/spaces' + response = self._get(path) + if response is None: + raise ValueError('Failed to retrieve spaces.') + + for space in response.get('results', []): + if space.get('id') == space_id: + return space.get('key') + raise ValueError(f'Space ID {space_id} not found.') + def search(self, cql: str): path = 'rest/api/content/search' params = {'cql': cql} @@ -46,12 +75,12 @@ def get_page_by_id(self, page_id: str): return self._get(path) def create_page(self, space: str, title: str, body: str, - parent_id: str): - path = 'rest/api/content' + parent_id: int | None): + path = 'api/v2/pages' data = { - 'type': 'page', + 'spaceId': '294916', # space, + 'status': 'current', 'title': title, - 'space': {'key': space}, 'body': { 'storage': { 'value': body, @@ -60,13 +89,12 @@ def create_page(self, space: str, title: str, body: str, } } if parent_id: - data['ancestors'] = [{'id': parent_id}] + data['parentId'] = parent_id return self._post(path, json=data) - def update_page(self, page_id: str, title: str, parent_id: int, + def update_page(self, page_id: str, title: str, parent_id: int | None, body: str, version: int): path = f'rest/api/content/{page_id}' - print("version is: ", version, type(version)) data = { 'id': page_id, 'status': 'current', @@ -84,18 +112,18 @@ def update_page(self, page_id: str, title: str, parent_id: int, return self._put(path, json=data) def remove_page(self, page_id: str): - path = f'rest/api/content/{page_id}' - return self._request('DELETE', path) + path = f'api/v2/pages/{page_id}' + return self._del(path) - def create_attachment(self, page_id: str, file_path: str, - comment: str): + def create_or_update_attachment(self, page_id: str, file_path: str, + comment: str | None = None): path = f'rest/api/content/{page_id}/child/attachment' files = {'file': open(file_path, 'rb')} params = {'comment': comment} if comment else {} - return self._post(path, files=files, params=params) + return self._put(path, files=files, params=params) def get_attachments(self, page_id: str): - path = f'rest/api/content/{page_id}/child/attachment' + path = f'api/v2/pages/{page_id}/attachments' return self._get(path) def update_attachment(self, attachment_id: str, file_path: str, @@ -105,7 +133,6 @@ def update_attachment(self, attachment_id: str, file_path: str, params = {'comment': comment} if comment else {} return self._post(path, files=files, params=params) - # TODO: validate def set_page_label(self, page_id: str, label: str): path = f'rest/api/content/{page_id}/label' data = {'prefix': 'global', 'name': label} diff --git a/markdown2confluence/config.py b/markdown2confluence/config.py index fdf54a1..ceb2aee 100644 --- a/markdown2confluence/config.py +++ b/markdown2confluence/config.py @@ -15,47 +15,51 @@ def __init__(self, args=None): args.confluence_url or os.environ.get('CONFLUENCE_URL', '') ).rstrip('/') - self.confluence_username = ( + self.confluence_username = str( args.confluence_username or os.environ.get('CONFLUENCE_USERNAME') ) - self.confluence_password = ( + self.confluence_password = str( args.confluence_password or os.environ.get('CONFLUENCE_PASSWORD') ) - self.confluence_space_id = ( + self.confluence_space_id = str( args.confluence_space_id or os.environ.get('CONFLUENCE_SPACE_ID') ) + self.confluence_space_key = str( + args.confluence_space_key or + os.environ.get('CONFLUENCE_SPACE_KEY') + ) self.confluence_parent_page_id = ( args.confluence_parent_page_id or os.environ.get('CONFLUENCE_PARENT_PAGE_ID') ) - self.confluence_page_title_suffix = ( + self.confluence_page_title_suffix = str( args.confluence_page_title_suffix or os.environ.get('CONFLUENCE_PAGE_TITLE_SUFFIX') or '(autogenerated)' ) - self.confluence_page_label = ( + self.confluence_page_label = str( args.confluence_page_label or os.environ.get('CONFLUENCE_PAGE_LABEL') or 'markdown2confluence' ) - self.confluence_root_page = ( + self.confluence_root_page = str( args.confluence_root_page or os.environ.get('CONFLUENCE_ROOT_PAGE') ) - self.markdown_folder = ( + self.markdown_folder = str( args.markdown_folder or os.environ.get('MARKDOWN_FOLDER') or './' ) - self.markdown_source_ref = ( + self.markdown_source_ref = str( args.markdown_source_ref or os.environ.get('MARKDOWN_SOURCE_REF') ) - self.confluence_ignorefile = ( + self.confluence_ignorefile = str( args.confluence_ignorefile or os.environ.get('CONFLUENCE_IGNOREFILE') ) @@ -66,14 +70,18 @@ def validate(self): missing_fields = [] required_fields = ['confluence_url', 'confluence_username', - 'confluence_password', 'confluence_space_id', - 'confluence_parent_page_id', + 'confluence_password', 'confluence_parent_page_id', 'confluence_page_title_suffix'] for key in required_fields: if not getattr(self, key): missing_fields.append(key) + if not (getattr(self, 'confluence_space_key') or + getattr(self, 'confluence_space_id')): + missing_fields.append( + 'confluence_space_key or confluence_space_id') + if missing_fields: raise ValueError("The following configuration fields are " "missing or empty: " + ", ".join(missing_fields)) @@ -99,6 +107,9 @@ def parse_args(): help="Confluence password") parser.add_argument( '--confluence-space-id', + help="Confluence space id") + parser.add_argument( + '--confluence-space-key', help="Confluence space key") parser.add_argument( '--confluence-parent-page-id', diff --git a/markdown2confluence/confluence.py b/markdown2confluence/confluence.py index 5861825..8258c7b 100644 --- a/markdown2confluence/confluence.py +++ b/markdown2confluence/confluence.py @@ -20,6 +20,18 @@ def __init__(self, confluence: Confluence | None = None): username=self.config.confluence_username, password=self.config.confluence_password, ) + + self.space_id: str = self.config.confluence_space_id + self.space_key: str = self.config.confluence_space_key + + if not self.space_id and self.space_key: + self.space_id = self.confluence.get_space_id_from_key( + self.space_key) + + if not self.space_key and self.space_id: + self.space_key = self.confluence.get_space_key_from_id( + self.space_id) + self.suffix = self.config.confluence_page_title_suffix self.label = self.config.confluence_page_label self.autogen_notice = ( @@ -36,7 +48,7 @@ def __init__(self, confluence: Confluence | None = None): def pre_publish_hook(self): cql = ( - f"space='{self.config.confluence_space_id}' " + f"space='{self.space_key}' " f"AND label='{self.label}' " f"AND title~'{self.suffix}'" ) @@ -71,18 +83,18 @@ def publish_node(self, node: ContentNode, parent_id: str | None) -> str: ) page = self._get_existing_page(title) - if page and page['id']: + if page: + page_id = page['id'] logger.debug( - f"Found existing page: {page['id']} matching title {title}") - self._update_page(page['id'], title, content, - parent_page, node) + f"Found existing page: {page_id} matching title {title}") + self._update_page(page_id, title, content, parent_page, node) else: - logger.debug( - f"Found no existing page for title {title}") + logger.debug(f"Found no existing page for title {title}") page_id = self._create_page(title, content, parent_page, node) if node.metadata: - self._attach_files(page_id, node.metadata.get('attachments', [])) + self._attach_files( + page_id, node.metadata.get('attachments', [])) return str(page_id) def _get_existing_page(self, title: str) -> dict | None: @@ -97,14 +109,10 @@ def _create_page(self, title: str, content: str, parent_id: int | None, logger.debug(f"creating page {title} with parent id {parent_id}") try: page = self.confluence.create_page( - space=self.config.confluence_space_id, + space=self.space_id, title=title, body=content, parent_id=parent_id, - type='page', - representation='storage', - editor='v2', - full_width=False ) page_id = str(page['id']) self.confluence.set_page_label(page_id, self.label) @@ -126,7 +134,6 @@ def _update_page(self, page_id: str, title: str, content: str, page = self.confluence.get_page_by_id(page_id) version = int(page['version']['number'] + 1) if 'version' in page else 1 - print("page: ", page, "version: ", version) self.confluence.update_page( page_id=page_id, title=title, @@ -150,13 +157,13 @@ def _attach_files(self, page_id: str, attachments: list[dict]): for attachment in attachments: logger.debug(attachment) - name = attachment['reference'] - filename = attachment['file_path'] + reference = attachment['reference'] + file_path = attachment['file_path'] - self.confluence.attach_file( - filename=filename, - name=name, + self.confluence.create_or_update_attachment( + file_path=file_path, page_id=page_id, + comment=reference, ) logger.info("Attached file %s with reference %s to page ID %s", - filename, name, page_id) + file_path, reference, page_id)