diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..afd9ca5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +__pycache__ +*.pyc +.env +venv +.venv +.vscode +.idea +.git +.github + diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..e8cea04 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,41 @@ +name: tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + pip install coverage + pip install . + + - name: Run tests + run: coverage run -m pytest + + - name: Make coverage report + run: coverage lcov + + - name: Comment coverage report on PR + if: ${{ github.event_name == 'pull_request' }} + uses: romeovs/lcov-reporter-action@v0.3.1 + with: + lcov-file: coverage.lcov + delete-old-comments: true diff --git a/.gptcontext b/.gptcontext index f9da741..25862e1 100644 --- a/.gptcontext +++ b/.gptcontext @@ -2,23 +2,13 @@ Additional context is provided below. Preferences for python code: - adhere to common style conventions, e.g. PEP8 -- keep lines under 80 characters long +- you MUST keep lines under 80 characters long Markdown2confluence pushes a folder containing markdown files and pushes them to confluence, with a page structure like the file and folder structure of the markdown files, and ignoring any non-markdown files. Required behavior: -All pages managed by markdown2confluence contains $CONFLUENCE_PAGE_TITLE_SUFFIX, e.g. '(autogenerated)'. New pages are created with this suffix, and on subsequent runs any pages with the suffix (or label, TBD) are overwritten or deleted. -Depending on how confluence labels work it might be best to use labels instead. If using labels, refuse to delete any pages that does not have the page title suffix. -Any markdown that contains full or relative links to local media files should be published as pages with attached media. Relative links in markdown to local media are resolved from the location of the markdown file. Full-path links in markdown are resolved from the $MARKDOWN_FOLDER - - -Currently I am working on: -- Publisher class in publish.py contains the old code for now, I am moving - functionality to the other classes. -- Change from directly using requests to using the confluence client from - atlassian -- Use labels instead of only relying on the suffix (previously called search - pattern) +- All pages managed by markdown2confluence contains a suffix, e.g. '(autogenerated)'. New pages are created with this suffix, and on subsequent runs any pages with the suffix (or label, TBD) are overwritten or deleted. Depending on how confluence labels work it might be best to use labels instead. If using labels, refuse to delete any pages that does not have the page title suffix. +- Any markdown that contains full or relative links to local media files should be published as pages with attached media. Relative links in markdown to local media are resolved from the location of the markdown file. Full-path links in markdown are resolved from the $MARKDOWN_FOLDER file structure: @@ -29,12 +19,15 @@ markdown2confluence/ │ └── usage.md ├── LICENCE ├── markdown2confluence -│ ├── converter.py │ ├── __init_.py │ ├── main.py +│ ├── converter.py │ ├── confluence.py │ ├── config.py -│ ├── file_manager.py +│ ├── content_tree.py +│ ├── parser.py +│ ├── util.py +│ ├── version.py │ └── publisher.py ├── README.md ├── requirements.txt @@ -48,7 +41,7 @@ markdown2confluence/ │ └── test_integration.py └── unit ├── __init__.py - ├── test_file_manager.py + ├── test_parser.py ├── test_confluence.py └── test_publisher.py @@ -79,63 +72,123 @@ CONFLUENCE_IGNOREFILE #### Components and Their Key Interfaces -1. **ConfluenceClient** +1. **Publisher** -Responsible for direct interactions with the Confluence API, handling operations like page creation, updates, deletion, and labeling with retries and backoff for robustness. +Abstract Publisher class for publishing a content tree, respecting the ContentTree structure and managing page relationships. ```python -class ConfluenceClient: - def __init__(self, confluence_config: dict): - """Initialize with API configuration.""" - - def create_or_update_page(self, title: str, html: str, parent_id=None, space_key: str, labels=None) -> dict: - """Create or update a Confluence page, applying labels.""" - - def delete_page(self, page_id: str) -> dict: - """Delete a Confluence page by ID.""" +class Publisher: + @abstractmethod + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + pass + + def pre_publish_hook(self): + """ + Optional step for actions to perform before publishing, such as + fetching/deleting previously published resources. + Can be overridden by subclasses. + """ + pass + + def post_publish_hook(self): + """ + Optional step for actions to perform after publishing, such as + cleaning up resources or performing additional logging. + Can be overridden by subclasses. + """ + pass + + def publish_content(self, content_tree: ContentTree): + """ + Traverse a content tree and call publish_node on each element. + """ + pass ``` -2. **Publisher** +2. **ConfluencePublisher** -Orchestrates the conversion of Markdown to HTML and the subsequent publishing to Confluence, respecting the original directory structure and managing page relationships. +Specialized publisher for confluence, implements the publish_node function responsible for creating/updating pages with labels etc in confluence ```python -class Publisher: - def __init__(self, confluence_client: ConfluenceClient, source_directory: str, space_key: str): - """Setup with Confluence client, source directory, and target space key.""" - - def publish(self): - """Main method to start the publishing process.""" - - def traverse_directory(self, directory: str, parent_id=None): - """Recursively traverse directories, converting and uploading Markdown files.""" +class ConfluencePublisher(Publisher): + def __init__(self, confluence: Confluence = None): + pass + + def pre_publish_hook(self): + """ + Specialized for this subclass. + Fetch all pages matching space, label and suffix + """ + + def post_publish_hook(self): + """ + Specialized for this subclass. + Delete pages not in the ContentTree + """ + + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + """ + Create or update pages, including attachments, ensuring labels on newly created pages. + """ + pass ``` -3. **FileManager** (unchanged, conceptual) +3. **Parser** -Handles file reading and potentially logging or other file outputs, and maybe traversing the file system +Responsible for parsing the source files from e.g. the file system. ```python -class FileManager: - def read_file(self, path: str) -> str: - """Read the content of a file.""" +class Parser(ABC): + @abstractmethod + def parse_directory(self, directory: str) -> ContentTree: + pass + + +class MarkdownParser(Parser): + def parse_directory(self, directory: str) -> ContentTree: + pass ``` -### Workflow Overview with Snippets +4. **ContentTree** + +Defines the shared data structure for content between Parser and Publisher -- The process starts with `Publisher`, which is initialized with necessary configurations and an instance of `ConfluenceClient`. - ```python -publisher = Publisher(confluence_client=ConfluenceClient(confluence_config), source_directory="path/to/markdown", space_key="SPACEKEY") -publisher.publish() -``` +@dataclass +class ContentNode: + name: str + content: str | None = None + metadata: dict | None = None + parent: 'ContentNode | None' = None + children: dict[str, 'ContentNode'] = field(default_factory=dict) + + def add_child(self, node: 'ContentNode'): + pass + + def get_child(self, name: str) -> 'ContentNode | None': + pass -- `Publisher.publish()` begins the process, invoking `traverse_directory()` to walk through the directory structure, processing each Markdown file by converting it to HTML. + def is_leaf(self) -> bool: + pass -- For each processed file, `Publisher` uses `ConfluenceClient.create_or_update_page()` to either create a new page or update an existing one in Confluence, applying a predefined label to mark the page as managed by `markdown2confluence`. + def is_root(self) -> bool: + pass -- Should a page need to be deleted or labels added, `Publisher` utilizes other methods of `ConfluenceClient` like `delete_page()` and maybe `add_labels_to_page()`, ensuring the Confluence space remains synchronized with the source content. + def __str__(self, level: int = 0) -> str: + pass -### Conclusion -This architecture, enriched with interface snippets, outlines a clear, modular approach to converting and managing Markdown content within Confluence, ensuring scalability and maintainability through well-defined responsibilities and robust Confluence API interactions. +@dataclass +class ContentTree: + root: ContentNode = field(default_factory=lambda: ContentNode('root')) + + def add_node(self, path_list: list, content: str | None = None, + metadata: dict | None = None): + pass + + def find_node(self, path_list: list) -> ContentNode | None: + pass + + def __str__(self) -> str: + pass +``` diff --git a/Dockerfile b/Dockerfile index 20d9dd6..a93902e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,18 @@ -FROM python:3.10-slim +FROM python:3.11-slim WORKDIR /app COPY requirements.txt /app/ - RUN pip install --no-cache-dir -r requirements.txt +COPY . /app/ + +# Install the current package +RUN pip install . + ENV CONFLUENCE_USERNAME="" ENV CONFLUENCE_PASSWORD="" -ENV CONFLUENCE_URL="https://yourdomain.atlassian.net/wiki/rest/api/" +ENV CONFLUENCE_URL="https://yourdomain.atlassian.net/wiki/" ENV CONFLUENCE_SPACE_ID="yourspace" ENV CONFLUENCE_PARENT_PAGE_ID="12345" ENV CONFLUENCE_PAGE_TITLE_SUFFIX="(autogenerated)" @@ -16,6 +20,4 @@ ENV CONFLUENCE_PAGE_LABEL="markdown2confluence" ENV MARKDOWN_FOLDER="./" ENV MARKDOWN_SOURCE_REF="" -COPY ./markdown2confluence /app - -CMD ["python", "/app/main.py"] +CMD ["python", "markdown2confluence/main.py"] diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..b3932d7 --- /dev/null +++ b/Pipfile @@ -0,0 +1,16 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +atlassian-python-api = "*" +markdown = "*" + +[dev-packages] +setuptools = "*" +pytest-watch = "*" +pytest = "*" + +[requires] +python_version = "3.11" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..6c4dcea --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,395 @@ +{ + "_meta": { + "hash": { + "sha256": "93a578c32fda5610b20068fede2f54e00397853170219c4019e59743cb50e67e" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.11" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "atlassian-python-api": { + "hashes": [ + "sha256:1c271ca9b1688acdaef09ad6f763570868a381394530d1fba49b5b104fffe54a", + "sha256:3c852f38ad8645887fbfe1526c12f2c1951ba06a24a1bbb36bdf7ccdc6d7b1ac" + ], + "index": "pypi", + "version": "==3.41.15" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", + "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==4.12.3" + }, + "certifi": { + "hashes": [ + "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8", + "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9" + ], + "markers": "python_version >= '3.6'", + "version": "==2024.8.30" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "deprecated": { + "hashes": [ + "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c", + "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.2.14" + }, + "idna": { + "hashes": [ + "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac", + "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603" + ], + "markers": "python_version >= '3.6'", + "version": "==3.8" + }, + "jmespath": { + "hashes": [ + "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", + "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, + "markdown": { + "hashes": [ + "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2", + "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803" + ], + "index": "pypi", + "version": "==3.7" + }, + "oauthlib": { + "hashes": [ + "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", + "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918" + ], + "markers": "python_version >= '3.6'", + "version": "==3.2.2" + }, + "requests": { + "hashes": [ + "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", + "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" + ], + "markers": "python_version >= '3.8'", + "version": "==2.32.3" + }, + "requests-oauthlib": { + "hashes": [ + "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", + "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9" + ], + "markers": "python_version >= '3.4'", + "version": "==2.0.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "soupsieve": { + "hashes": [ + "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", + "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9" + ], + "markers": "python_version >= '3.8'", + "version": "==2.6" + }, + "urllib3": { + "hashes": [ + "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", + "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.2" + }, + "wrapt": { + "hashes": [ + "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc", + "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81", + "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", + "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e", + "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca", + "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0", + "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb", + "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487", + "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40", + "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c", + "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060", + "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202", + "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41", + "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9", + "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", + "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664", + "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", + "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362", + "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00", + "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc", + "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", + "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267", + "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956", + "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966", + "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1", + "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228", + "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72", + "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", + "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292", + "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0", + "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0", + "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", + "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c", + "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5", + "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f", + "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73", + "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b", + "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2", + "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593", + "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39", + "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389", + "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf", + "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf", + "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89", + "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c", + "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c", + "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f", + "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440", + "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465", + "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136", + "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b", + "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8", + "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3", + "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8", + "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6", + "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e", + "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f", + "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c", + "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e", + "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8", + "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2", + "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020", + "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35", + "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d", + "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3", + "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537", + "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809", + "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d", + "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a", + "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4" + ], + "markers": "python_version >= '3.6'", + "version": "==1.16.0" + } + }, + "develop": { + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, + "docopt": { + "hashes": [ + "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" + ], + "version": "==0.6.2" + }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, + "packaging": { + "hashes": [ + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" + ], + "markers": "python_version >= '3.8'", + "version": "==24.1" + }, + "pluggy": { + "hashes": [ + "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", + "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" + ], + "markers": "python_version >= '3.8'", + "version": "==1.5.0" + }, + "pytest": { + "hashes": [ + "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5", + "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce" + ], + "index": "pypi", + "version": "==8.3.2" + }, + "pytest-watch": { + "hashes": [ + "sha256:06136f03d5b361718b8d0d234042f7b2f203910d8568f63df2f866b547b3d4b9" + ], + "index": "pypi", + "version": "==4.2.0" + }, + "setuptools": { + "hashes": [ + "sha256:5f4c08aa4d3ebcb57a50c33b1b07e94315d7fc7230f7115e47fc99776c8ce308", + "sha256:95b40ed940a1c67eb70fc099094bd6e99c6ee7c23aa2306f4d2697ba7916f9c6" + ], + "index": "pypi", + "version": "==74.1.2" + }, + "watchdog": { + "hashes": [ + "sha256:14dd4ed023d79d1f670aa659f449bcd2733c33a35c8ffd88689d9d243885198b", + "sha256:29e4a2607bd407d9552c502d38b45a05ec26a8e40cc7e94db9bb48f861fa5abc", + "sha256:3960136b2b619510569b90f0cd96408591d6c251a75c97690f4553ca88889769", + "sha256:3e8d5ff39f0a9968952cce548e8e08f849141a4fcc1290b1c17c032ba697b9d7", + "sha256:53ed1bf71fcb8475dd0ef4912ab139c294c87b903724b6f4a8bd98e026862e6d", + "sha256:5597c051587f8757798216f2485e85eac583c3b343e9aa09127a3a6f82c65ee8", + "sha256:638bcca3d5b1885c6ec47be67bf712b00a9ab3d4b22ec0881f4889ad870bc7e8", + "sha256:6bec703ad90b35a848e05e1b40bf0050da7ca28ead7ac4be724ae5ac2653a1a0", + "sha256:726eef8f8c634ac6584f86c9c53353a010d9f311f6c15a034f3800a7a891d941", + "sha256:72990192cb63872c47d5e5fefe230a401b87fd59d257ee577d61c9e5564c62e5", + "sha256:7d1aa7e4bb0f0c65a1a91ba37c10e19dabf7eaaa282c5787e51371f090748f4b", + "sha256:8c47150aa12f775e22efff1eee9f0f6beee542a7aa1a985c271b1997d340184f", + "sha256:901ee48c23f70193d1a7bc2d9ee297df66081dd5f46f0ca011be4f70dec80dab", + "sha256:963f7c4c91e3f51c998eeff1b3fb24a52a8a34da4f956e470f4b068bb47b78ee", + "sha256:9814adb768c23727a27792c77812cf4e2fd9853cd280eafa2bcfa62a99e8bd6e", + "sha256:aa9cd6e24126d4afb3752a3e70fce39f92d0e1a58a236ddf6ee823ff7dba28ee", + "sha256:b6dc8f1d770a8280997e4beae7b9a75a33b268c59e033e72c8a10990097e5fde", + "sha256:b84bff0391ad4abe25c2740c7aec0e3de316fdf7764007f41e248422a7760a7f", + "sha256:ba32efcccfe2c58f4d01115440d1672b4eb26cdd6fc5b5818f1fb41f7c3e1889", + "sha256:bda40c57115684d0216556671875e008279dea2dc00fcd3dde126ac8e0d7a2fb", + "sha256:c4a440f725f3b99133de610bfec93d570b13826f89616377715b9cd60424db6e", + "sha256:d010be060c996db725fbce7e3ef14687cdcc76f4ca0e4339a68cc4532c382a73", + "sha256:d2ab34adc9bf1489452965cdb16a924e97d4452fcf88a50b21859068b50b5c3b", + "sha256:d7594a6d32cda2b49df3fd9abf9b37c8d2f3eab5df45c24056b4a671ac661619", + "sha256:d961f4123bb3c447d9fcdcb67e1530c366f10ab3a0c7d1c0c9943050936d4877", + "sha256:dae7a1879918f6544201d33666909b040a46421054a50e0f773e0d870ed7438d", + "sha256:dcebf7e475001d2cdeb020be630dc5b687e9acdd60d16fea6bb4508e7b94cf76", + "sha256:f627c5bf5759fdd90195b0c0431f99cff4867d212a67b384442c51136a098ed7", + "sha256:f8b2918c19e0d48f5f20df458c84692e2a054f02d9df25e6c3c930063eca64c1", + "sha256:fb223456db6e5f7bd9bbd5cd969f05aae82ae21acc00643b60d81c770abd402b" + ], + "markers": "python_version >= '3.9'", + "version": "==5.0.2" + } + } +} diff --git a/README.md b/README.md index 8b3bd33..2100609 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,14 @@ # markdown2confluence +[![Tests](https://github.com/innofactororg/markdown2confluence/actions/workflows/test.yaml/badge.svg)](https://github.com/innofactororg/markdown2confluence/actions/workflows/test.yaml) +![Python](https://img.shields.io/badge/python-3.12-blue.svg) +[![PEP8](https://img.shields.io/badge/code%20style-pep8-orange.svg)](https://www.python.org/dev/peps/pep-0008/) + Convert your Markdown files into Confluence pages with ease using the `markdown2confluence` script. It uploads all files from a specified Markdown directory to a Confluence space, applying a specific page label and maintaining the folder hierarchy as page structure. ## Prerequisites -Before you get started, you will need the space ID of your Confluence space and the ID of the parent page where you want to create new pages. +Before you get started, ensure that you have Python version 3.11 or higher installed on your system. You will also need the space ID of your Confluence space and the ID of the parent page where you want to create new pages. ## Configuration @@ -16,17 +20,18 @@ Configure your environment with the following variables before running the scrip - `CONFLUENCE_SPACE_ID`: The key of the Confluence space where the pages will be created - `CONFLUENCE_PARENT_PAGE_ID`: The ID of the parent Confluence page under which new pages will be created - `MARKDOWN_FOLDER`: The path to the folder containing markdown files -- `MARKDOWN_SOURCE_REF`: The url of the markdown source, e.g. and url to your repo +- `MARKDOWN_SOURCE_REF`: The url of the markdown source, e.g., a URL to your repo - `CONFLUENCE_PAGE_LABEL`: A label to apply to the Confluence pages to manage them as a group Optionally, you can also set: - `CONFLUENCE_PAGE_TITLE_SUFFIX`: A suffix to append to the title of Confluence pages, e.g., '(autogenerated)' - `CONFLUENCE_IGNOREFILE`: Path to a file containing patterns to ignore when publishing -## Usage +## Usage (docker) To upload sample markdown files to Confluence, run the following Docker command, replacing `VAR1`, `VAR2`, etc., with your Confluence configuration values: + ```bash # Run from the root of this repo docker run --rm \ diff --git a/markdown2confluence/__init_.py b/markdown2confluence/__init_.py index c13685a..9359f50 100644 --- a/markdown2confluence/__init_.py +++ b/markdown2confluence/__init_.py @@ -6,5 +6,8 @@ from .main import main from .converter import Converter from .publisher import Publisher +from .confluence import ConfluencePublisher +from .parser import Parser, MarkdownParser from .config import Config - +from .util import Logger +from .content_tree import ContentTree, ContentNode diff --git a/markdown2confluence/api.py b/markdown2confluence/api.py new file mode 100644 index 0000000..7cad1f1 --- /dev/null +++ b/markdown2confluence/api.py @@ -0,0 +1,139 @@ +import requests +from requests.auth import HTTPBasicAuth +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + + +class MinimalConfluence: + def __init__(self, url: str, username: str, password: str): + self.url = url if url.endswith('/') else url + '/' + self.api = requests.Session() + retries = Retry( + total=5, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504] + ) + self.api.mount('https://', HTTPAdapter(max_retries=retries)) + + if username and password: + self.api.auth = HTTPBasicAuth(username, password) + else: + raise ValueError( + 'Both username and password (api token) must be set.') + + def _request(self, method: str, path: str, **kwargs): + url = f'{self.url}{path}' + response = self.api.request(method, url, **kwargs) + response.raise_for_status() + + if response.status_code == 204: # No Content + return None + + return response.json() + + def _get(self, path: str, **kwargs): + return self._request('GET', path, **kwargs) + + def _post(self, path: str, **kwargs): + return self._request('POST', path, **kwargs) + + def _put(self, path: str, **kwargs): + return self._request('PUT', path, **kwargs) + + def _del(self, path: str, **kwargs): + return self._request('DELETE', path, **kwargs) + + def get_space_id_from_key(self, space_key: str) -> str: + path = 'api/v2/spaces' + response = self._get(path) + if response is None: + raise ValueError('Failed to retrieve spaces.') + + for space in response.get('results', []): + if space.get('key') == space_key: + return space.get('id') + raise ValueError(f'Space key {space_key} not found.') + + def get_space_key_from_id(self, space_id: str) -> str: + path = 'api/v2/spaces' + response = self._get(path) + if response is None: + raise ValueError('Failed to retrieve spaces.') + + for space in response.get('results', []): + if space.get('id') == space_id: + return space.get('key') + raise ValueError(f'Space ID {space_id} not found.') + + def search(self, cql: str): + path = 'rest/api/content/search' + params = {'cql': cql} + return self._get(path, params=params) + + def get_page_by_id(self, page_id: str): + path = f'rest/api/content/{page_id}' + return self._get(path) + + def create_page(self, space: str, title: str, body: str, + parent_id: int | None): + path = 'api/v2/pages' + data = { + 'spaceId': '294916', # space, + 'status': 'current', + 'title': title, + 'body': { + 'storage': { + 'value': body, + 'representation': 'storage' + } + } + } + if parent_id: + data['parentId'] = parent_id + return self._post(path, json=data) + + def update_page(self, page_id: str, title: str, parent_id: int | None, + body: str, version: int): + path = f'rest/api/content/{page_id}' + data = { + 'id': page_id, + 'status': 'current', + 'title': title, + 'type': 'page', + 'parentId': parent_id, + 'version': {'number': version}, + 'body': { + 'storage': { + 'value': body, + 'representation': 'storage' + } + } + } + return self._put(path, json=data) + + def remove_page(self, page_id: str): + path = f'api/v2/pages/{page_id}' + return self._del(path) + + def create_or_update_attachment(self, page_id: str, file_path: str, + comment: str | None = None): + path = f'rest/api/content/{page_id}/child/attachment' + files = {'file': open(file_path, 'rb')} + params = {'comment': comment} if comment else {} + return self._put(path, files=files, params=params) + + def get_attachments(self, page_id: str): + path = f'api/v2/pages/{page_id}/attachments' + return self._get(path) + + def update_attachment(self, attachment_id: str, file_path: str, + comment: str): + path = f'rest/api/content/{attachment_id}/data' + files = {'file': open(file_path, 'rb')} + params = {'comment': comment} if comment else {} + return self._post(path, files=files, params=params) + + def set_page_label(self, page_id: str, label: str): + path = f'rest/api/content/{page_id}/label' + data = {'prefix': 'global', 'name': label} + return self._post(path, json=data) diff --git a/markdown2confluence/config.py b/markdown2confluence/config.py index 8dae316..ceb2aee 100644 --- a/markdown2confluence/config.py +++ b/markdown2confluence/config.py @@ -1,6 +1,10 @@ import argparse import os +from markdown2confluence.util import Logger + +logger = Logger(__name__).get_logger() + class Config: def __init__(self, args=None): @@ -11,62 +15,72 @@ def __init__(self, args=None): args.confluence_url or os.environ.get('CONFLUENCE_URL', '') ).rstrip('/') - self.confluence_username = ( + self.confluence_username = str( args.confluence_username or os.environ.get('CONFLUENCE_USERNAME') ) - self.confluence_password = ( + self.confluence_password = str( args.confluence_password or os.environ.get('CONFLUENCE_PASSWORD') ) - self.confluence_space_id = ( + self.confluence_space_id = str( args.confluence_space_id or os.environ.get('CONFLUENCE_SPACE_ID') ) + self.confluence_space_key = str( + args.confluence_space_key or + os.environ.get('CONFLUENCE_SPACE_KEY') + ) self.confluence_parent_page_id = ( args.confluence_parent_page_id or os.environ.get('CONFLUENCE_PARENT_PAGE_ID') ) - self.markdown_folder = ( + self.confluence_page_title_suffix = str( + args.confluence_page_title_suffix or + os.environ.get('CONFLUENCE_PAGE_TITLE_SUFFIX') or + '(autogenerated)' + ) + self.confluence_page_label = str( + args.confluence_page_label or + os.environ.get('CONFLUENCE_PAGE_LABEL') or + 'markdown2confluence' + ) + self.confluence_root_page = str( + args.confluence_root_page or + os.environ.get('CONFLUENCE_ROOT_PAGE') + ) + + self.markdown_folder = str( args.markdown_folder or os.environ.get('MARKDOWN_FOLDER') or './' ) - self.markdown_source_ref = ( + self.markdown_source_ref = str( args.markdown_source_ref or os.environ.get('MARKDOWN_SOURCE_REF') ) - self.confluence_ignorefile = ( + self.confluence_ignorefile = str( args.confluence_ignorefile or os.environ.get('CONFLUENCE_IGNOREFILE') ) - self.confluence_page_title_suffix = ( - args.confluence_page_title_suffix or - os.environ.get('CONFLUENCE_PAGE_TITLE_SUFFIX') or - '(autogenerated)' - ) - self.confluence_page_label = ( - args.confluence_page_label or - os.environ.get('CONFLUENCE_PAGE_LABEL') or - 'markdown2confluence' - ) + self.validate() def validate(self): missing_fields = [] - if not self.confluence_url: - missing_fields.append("confluence_url") - if not self.confluence_username: - missing_fields.append("confluence_username") - if not self.confluence_password: - missing_fields.append("confluence_password") - if not self.confluence_space_id: - missing_fields.append("confluence_space_id") - if not self.confluence_parent_page_id: - missing_fields.append("confluence_parent_page_id") - if not self.confluence_page_title_suffix: - missing_fields.append("confluence_page_title_suffix") + required_fields = ['confluence_url', 'confluence_username', + 'confluence_password', 'confluence_parent_page_id', + 'confluence_page_title_suffix'] + + for key in required_fields: + if not getattr(self, key): + missing_fields.append(key) + + if not (getattr(self, 'confluence_space_key') or + getattr(self, 'confluence_space_id')): + missing_fields.append( + 'confluence_space_key or confluence_space_id') if missing_fields: raise ValueError("The following configuration fields are " @@ -93,10 +107,21 @@ def parse_args(): help="Confluence password") parser.add_argument( '--confluence-space-id', + help="Confluence space id") + parser.add_argument( + '--confluence-space-key', help="Confluence space key") parser.add_argument( '--confluence-parent-page-id', - help="Parent page ID under which to add the new page") + help="Parent page ID under which to add all top-level pages") + parser.add_argument( + '--confluence-page-label', + help=("Label to assign to Confluence pages managed by " + "markdown2confluence")) + parser.add_argument( + '--confluence-root-page', + help=("Add a top-level page under which all pages will be organized.") + ) parser.add_argument( '--markdown-folder', help="File or folder containing Markdown files to publish") @@ -110,9 +135,5 @@ def parse_args(): '--confluence-page-title-suffix', help="Suffix for Confluence page titles, to denote pages " "managed by markdown2confluence") - parser.add_argument( - '--confluence-page-label', - help=("Label to assign to Confluence pages managed by " - "markdown2confluence")) return parser.parse_args() diff --git a/markdown2confluence/confluence.py b/markdown2confluence/confluence.py index b046edd..8258c7b 100644 --- a/markdown2confluence/confluence.py +++ b/markdown2confluence/confluence.py @@ -1,18 +1,169 @@ +from markdown2confluence.api import MinimalConfluence as Confluence +import requests +import hashlib -class ConfluenceClient: - def __init__(self, confluence_config: dict): - """Initialize with API configuration.""" - self.api_endpoint = confluence_config["api_endpoint"] - self.auth = (confluence_config["username"], - confluence_config["password"]) - - def create_or_update_page(self, title: str, html: str, parent_id=None, - space_key: str, labels=None) -> dict: - """Create or update a Confluence page, applying labels.""" - # Implementation for creating or updating a Confluence page - pass - - def delete_page(self, page_id: str) -> dict: - """Delete a Confluence page by ID.""" - # Implementation for deleting a Confluence page - pass +from markdown2confluence.util import Logger +from markdown2confluence.config import Config +from markdown2confluence.converter import Converter +from markdown2confluence.publisher import Publisher +from markdown2confluence.content_tree import ContentNode + + +logger = Logger(__name__).get_logger() + + +class ConfluencePublisher(Publisher): + def __init__(self, confluence: Confluence | None = None): + self.config = Config() + self.confluence = confluence or Confluence( + url=self.config.confluence_url, + username=self.config.confluence_username, + password=self.config.confluence_password, + ) + + self.space_id: str = self.config.confluence_space_id + self.space_key: str = self.config.confluence_space_key + + if not self.space_id and self.space_key: + self.space_id = self.confluence.get_space_id_from_key( + self.space_key) + + if not self.space_key and self.space_id: + self.space_key = self.confluence.get_space_key_from_id( + self.space_id) + + self.suffix = self.config.confluence_page_title_suffix + self.label = self.config.confluence_page_label + self.autogen_notice = ( + "" + "Do not make changes here" + "" + "

This page is autogenerated. Make changes in the " + f"GitHub repository

" + "
" + "
" + ) + + logger.info("Initialized Publisher") + + def pre_publish_hook(self): + cql = ( + f"space='{self.space_key}' " + f"AND label='{self.label}' " + f"AND title~'{self.suffix}'" + ) + self.stale_pages: list[dict[str, any]] = self.confluence.search( + cql).get('results', []) + logger.info("Fetched %d stale pages", len(self.stale_pages)) + logger.debug("Stale pages: %s", self.stale_pages) + + def post_publish_hook(self): + logger.debug(f"Found {len(self.stale_pages)} remaining stale pages") + for page in self.stale_pages: + page_id = page['id'] + title = page['title'] + + if not title.endswith(self.suffix): + logger.warning("Skipping deletion of unmanaged page %s", title) + continue + + self.confluence.remove_page(page_id) + logger.info("Deleted unmanaged page %s", title) + + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + identifier = f"{node.name}{node.parent.name if node.parent else None}{self.config.confluence_root_page}" + hash = hashlib.md5(identifier.encode('utf-8')).hexdigest()[:3] + + title = f"{node.name} #{hash} {self.suffix}" + content = Converter.convert_markdown_to_html(node.content or "") + content = self.autogen_notice + content + parent_page = ( + int(parent_id) if parent_id is not None + else self.config.confluence_parent_page_id + ) + + page = self._get_existing_page(title) + if page: + page_id = page['id'] + logger.debug( + f"Found existing page: {page_id} matching title {title}") + self._update_page(page_id, title, content, parent_page, node) + else: + logger.debug(f"Found no existing page for title {title}") + page_id = self._create_page(title, content, parent_page, node) + + if node.metadata: + self._attach_files( + page_id, node.metadata.get('attachments', [])) + return str(page_id) + + def _get_existing_page(self, title: str) -> dict | None: + for page in self.stale_pages: + if page['title'] == title: + self.stale_pages.remove(page) + return page + return None + + def _create_page(self, title: str, content: str, parent_id: int | None, + node: ContentNode) -> str: + logger.debug(f"creating page {title} with parent id {parent_id}") + try: + page = self.confluence.create_page( + space=self.space_id, + title=title, + body=content, + parent_id=parent_id, + ) + page_id = str(page['id']) + self.confluence.set_page_label(page_id, self.label) + logger.info("Created page %s with ID %s", title, page_id) + return page_id + except requests.exceptions.HTTPError as e: + if e.response.status_code == 400: + logger.error("Error creating page: %s", e.response.text) + raise ValueError( + "Failed to create page due to bad request.") from e + else: + logger.error("HTTP error occurred: %s", e.response.text) + raise + + def _update_page(self, page_id: str, title: str, content: str, + parent_id: int | None, node: ContentNode): + logger.debug(f"updating page {node.name} with parent {page_id}") + try: + page = self.confluence.get_page_by_id(page_id) + version = int(page['version']['number'] + + 1) if 'version' in page else 1 + self.confluence.update_page( + page_id=page_id, + title=title, + body=content, + parent_id=parent_id, + version=version, + ) + self.confluence.set_page_label(page_id, self.label) + logger.info("Updated page %s with ID %s and label %s", + title, page_id, self.label) + except requests.exceptions.HTTPError as e: + if e.response.status_code == 400: + logger.error("Error updating page: %s", e.response.text) + raise ValueError( + "Failed to update page due to bad request.") from e + else: + logger.error("HTTP error occurred: %s", e.response.text) + raise + + def _attach_files(self, page_id: str, attachments: list[dict]): + for attachment in attachments: + logger.debug(attachment) + + reference = attachment['reference'] + file_path = attachment['file_path'] + + self.confluence.create_or_update_attachment( + file_path=file_path, + page_id=page_id, + comment=reference, + ) + logger.info("Attached file %s with reference %s to page ID %s", + file_path, reference, page_id) diff --git a/markdown2confluence/content_tree.py b/markdown2confluence/content_tree.py new file mode 100644 index 0000000..b4d3a96 --- /dev/null +++ b/markdown2confluence/content_tree.py @@ -0,0 +1,74 @@ +from dataclasses import dataclass, field + + +@dataclass +class ContentNode: + name: str + content: str | None = None + metadata: dict | None = None + parent: 'ContentNode | None' = None + children: dict[str, 'ContentNode'] = field(default_factory=dict) + + def add_child(self, node: 'ContentNode'): + node.parent = self + self.children[node.name] = node + + def get_child(self, name: str) -> 'ContentNode | None': + return self.children.get(name) + + def is_leaf(self) -> bool: + return not self.children + + def is_root(self) -> bool: + return self.parent is None + + def _set_name(self, name: str): + self.name = name + return + + def __str__(self, level: int = 0) -> str: + ret = "\t" * level + repr(self.name) + "\n" + for child in self.children.values(): + ret += child.__str__(level + 1) + return ret + + +@dataclass +class ContentTree: + root: ContentNode = field(default_factory=lambda: ContentNode('root')) + + def add_node(self, path_list: list, content: str | None = None, + metadata: dict | None = None): + if not path_list: + raise ValueError("Path list cannot be empty.") + current_node = self.root + for part in path_list: + if not part: + raise ValueError("Path components must be non-empty strings.") + next_node = current_node.get_child(part) + if not next_node: + next_node = ContentNode(name=part) + current_node.add_child(next_node) + current_node = next_node + if current_node is self.root: + raise ValueError("Cannot add content to the root node.") + current_node.content = content + current_node.metadata = metadata + + def find_node(self, path_list: list) -> ContentNode | None: + if not path_list: + raise ValueError("Path list cannot be empty.") + current_node = self.root + for part in path_list: + if not part: + raise ValueError("Path components must be non-empty strings.") + current_node = current_node.get_child(part) + if current_node is None: + return None + return current_node + + def rename_root(self, name: str): + self.root._set_name(name) + + def __str__(self) -> str: + return str(self.root) diff --git a/markdown2confluence/converter.py b/markdown2confluence/converter.py index c7612ed..9513b01 100644 --- a/markdown2confluence/converter.py +++ b/markdown2confluence/converter.py @@ -1,4 +1,8 @@ import markdown +from markdown.extensions.codehilite import CodeHiliteExtension +from markdown.extensions.extra import ExtraExtension +from markdown.extensions.meta import MetaExtension +from markdown.extensions.toc import TocExtension class Converter: @@ -8,7 +12,8 @@ def __init__(self): """Initialize the converter.""" pass - def convert_markdown_to_html(self, markdown_content): + @staticmethod + def convert_markdown_to_html(markdown_content): """Convert Markdown content to HTML. Args: @@ -17,5 +22,13 @@ def convert_markdown_to_html(self, markdown_content): Returns: str: HTML content generated from the Markdown. """ - html_content = markdown.markdown(markdown_content) + extensions = [ + CodeHiliteExtension(linenums=False, guess_lang=False), + ExtraExtension(), + MetaExtension(), + TocExtension(permalink=True) + ] + html_content = markdown.markdown( + markdown_content, extensions=extensions) return html_content + diff --git a/markdown2confluence/file_manager.py b/markdown2confluence/file_manager.py deleted file mode 100644 index 8e3b1e4..0000000 --- a/markdown2confluence/file_manager.py +++ /dev/null @@ -1,5 +0,0 @@ -class FileManager: - def read_file(self, path: str) -> str: - """Read the content of a file.""" - with open(path, 'r', encoding='utf-8') as file: - return file.read() diff --git a/markdown2confluence/logo.py b/markdown2confluence/logo.py new file mode 100644 index 0000000..685a71c --- /dev/null +++ b/markdown2confluence/logo.py @@ -0,0 +1,19 @@ +LOGO_TEXT = """ + _ _ + _ __ ___ __ _ _ __| | ____| | _____ ___ __ +| '_ ` _ \\ / _` | '__| |/ / _` |/ _ \\ \\ /\\ / / '_ \\ +| | | | | | (_| | | | < (_| | (_) \\ V V /| | | | +|_| |_| |_|\\__,_|_| |_|\\_\\__,_|\\___/ \\_/\\_/ |_| |_| + + ____ + |___ \\ + __) | + / __/ + |_____| + + __ _ + ___ ___ _ __ / _| |_ _ ___ _ __ ___ ___ + / __/ _ \\| '_ \\| |_| | | | |/ _ \\ '_ \\ / __/ _ \\ +| (_| (_) | | | | _| | |_| | __/ | | | (_| __/ + \\___\\___/|_| |_|_| |_|\\__,_|\\___|_| |_|\\___\\___| +""" diff --git a/markdown2confluence/main.py b/markdown2confluence/main.py index 20e73be..77779f6 100644 --- a/markdown2confluence/main.py +++ b/markdown2confluence/main.py @@ -1,40 +1,31 @@ -import logging -import time +from markdown2confluence.confluence import ConfluencePublisher as Publisher +from markdown2confluence.parser import MarkdownParser as Parser +from markdown2confluence.logo import LOGO_TEXT +from markdown2confluence.util import Logger +from markdown2confluence.config import Config -from config import Config -from converter import Converter -from publisher import Publisher +import importlib.metadata + +logger = Logger("main").get_logger() +version = importlib.metadata.version("markdown2confluence") + + +def logo_and_version(): + logger.info(LOGO_TEXT) def main(): + logo_and_version() + logger.info(f"Started markdown2confluence version: {version}") + config = Config() + directory = config.markdown_folder + + logger.info("Parsing folder %s", directory) + content = Parser().parse_directory(directory) - converter = Converter() - publisher = Publisher( - url=config.confluence_url, - username=config.confluence_username, - password=config.confluence_password, - space_id=config.confluence_space_id, - parent_page_id=config.confluence_parent_page_id, - page_title_suffix=config.confluence_page_title_suffix, - page_label=config.confluence_page_label, - markdown_folder=config.markdown_folder, - markdown_source_ref=config.markdown_source_ref, - confluence_ignorefile=config.confluence_ignorefile) - - logging.basicConfig(level=logging.INFO) - logging.debug(config) - - pages = publisher.search_pages() - publisher.delete_pages(pages_id_list=pages) - - time.sleep(5) # Sleep for 5 seconds to allow the delete to fully complete - - # Publish the markdown files from the specified folder - publisher.publish_folder( - folder=config.markdown_folder, - parent_page_id=config.confluence_parent_page_id - ) + logger.info("Publishing content from directory %s", directory) + Publisher().publish_content(content) if __name__ == "__main__": diff --git a/markdown2confluence/parser.py b/markdown2confluence/parser.py new file mode 100644 index 0000000..099614d --- /dev/null +++ b/markdown2confluence/parser.py @@ -0,0 +1,70 @@ +import os +import re + +from abc import ABC, abstractmethod +from collections.abc import Iterator + +from markdown2confluence.util import Logger +from markdown2confluence.content_tree import ContentTree + +logger = Logger(__name__).get_logger() + + +class Parser(ABC): + @abstractmethod + def parse_directory(self, directory: str) -> ContentTree: + pass + + +class MarkdownParser(Parser): + + def parse_directory(self, directory: str) -> ContentTree: + content_tree = ContentTree() + for file_path in self._get_markdown_files(directory): + content = self._read_file_content(file_path) + path_list = self._get_relative_path_as_list(file_path, directory) + attachments = [] + for ref in self._get_media_references(content): + file_path = os.path.join(directory, ref) + attachments.append({ + "reference": ref, + "file_path": file_path, + }) + + content_tree.add_node( + path_list=path_list, + content=content, + metadata={'attachments': attachments} + ) + return content_tree + + def _get_markdown_files(self, directory: str) -> Iterator[str]: + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('.md'): + yield os.path.join(root, file) + + def _read_file_content(self, file_path: str) -> str: + if not os.path.exists(file_path): + raise FileNotFoundError(f"The file {file_path} was not found.") + with open(file_path, 'r', encoding='utf-8') as md_file: + return md_file.read() + + def _get_media_references(self, markdown: str) -> list[str]: + files_to_upload = [] + + for line in markdown.splitlines(): + match = re.search( + r"!\[.*?\]\((?!http)(.*?\.(?:jpg|jpeg|png|gif|bmp|svg|webp|tiff))\)", # noqa E501 + line + ) + if match: + file_path = match.group(1) + logger.debug(f"Found file for attaching: {file_path}") + files_to_upload.append(file_path) + + return files_to_upload + + def _get_relative_path_as_list( + self, file_path: str, base_directory: str) -> list[str]: + return os.path.relpath(file_path, base_directory).split(os.sep) diff --git a/markdown2confluence/publisher.py b/markdown2confluence/publisher.py index ee87fd4..4c4e31c 100644 --- a/markdown2confluence/publisher.py +++ b/markdown2confluence/publisher.py @@ -1,435 +1,61 @@ -import backoff -import json -import logging -import os -import random -import re -import requests -import string +from abc import ABC, abstractmethod -from atlassian import Confluence -from markdown import markdown -from pathspec import PathSpec -from pathspec.patterns import GitWildMatchPattern -from requests.auth import HTTPBasicAuth +from markdown2confluence.util import Logger +from markdown2confluence.config import Config +from markdown2confluence.content_tree import ContentTree, ContentNode -# Set up basic configuration for logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) +logger = Logger(__name__).get_logger() -class Publisher: - def __init__(self, url, username, password, space_id, - parent_page_id, page_title_suffix, - page_label, markdown_folder, - markdown_source_ref, confluence_ignorefile): - self.confluence = Confluence( - url=url, - username=username, - password=password - ) - self.space_id = space_id - self.parent_page_id = parent_page_id - self.page_title_suffix = page_title_suffix - self.page_label = page_label +class Publisher(ABC): + def __init__(self, config: Config | None = None): + self.config = config if config is not None else Config() - # TODO: remove and use confluence client - self.url = url - self.username = username - self.password = password - self.markdown_folder = markdown_folder - self.markdown_source_ref = markdown_source_ref + @abstractmethod + def publish_node(self, node: ContentNode, parent_id: str | None) -> str: + pass - self.ignore_patterns = self.load_ignore_patterns(confluence_ignorefile) - - # def publish_page(self, title, content): - # title_with_suffix = f"{title}{self.page_title_suffix}" - # existing_page = self.confluence.get_page_by_title( - # space=self.space_id, - # title=title_with_suffix, - # expand='version' - # ) - # if existing_page: - # return - # # return self.update_page( - # # page_id=existing_page['id'], - # # title=title_with_suffix, - # # content=content - # # ) - # else: - # return self.confluence.create_page( - # space=self.space_id, - # title=title_with_suffix, - # body=content, - # parent_id=self.parent_page_id, - # type='page' - # ) - # - # def update_page(self, page_id, title, content): - # return self.confluence.update_page( - # page_id=page_id, - # title=title, - # body=content, - # type='page', - # ) - # - # def delete_page(self, page_id): - # return self.confluence.remove_page(page_id) - # - # - - def create_page(self, title, content, parent_page_id): - - # descripe json query - newPageJSONQueryString = """ - { - "type": "page", - "title": "DEFAULT PAGE TITLE", - "ancestors": [ - { - "id": 111 - } - ], - "space": { - "key": "DEFAULT KEY" - }, - "body": { - "storage": { - "value": "DEFAULT PAGE CONTENT", - "representation": "storage" - } - } - } + def pre_publish_hook(self): """ - - # load json from string - newPagejsonQuery = json.loads(newPageJSONQueryString) - - # the key of Confluence space for content publishing - newPagejsonQuery['space']['key'] = self.space_id - - # check of input of the ParentPageID - if parent_page_id is None: - # this is the root of out pages tree - newPagejsonQuery['ancestors'][0]['id'] = self.parent_page_id - else: - newPagejsonQuery['ancestors'][0]['id'] = str( - parent_page_id) # this is the branch of our tree - - newPagejsonQuery['title'] = title + " " + \ - self.page_title_suffix + \ - " #" + self.generate_random_string(length=3) - - # add content if the page from the input parameter - newPagejsonQuery['body']['storage']['value'] = ( - '' - 'Do not make changes here' - '' - '

This page is autogenerated. Make changes in the ' - f'GitHub repository

' - '
' - '
' + content - ) - - logging.info("Create new page: " + newPagejsonQuery['title']) - logging.debug("with content: " + - newPagejsonQuery['body']['storage']['value']) - logging.debug(json.dumps(newPagejsonQuery, indent=4, sort_keys=True)) - - # make call to create new page - logging.debug("Calling URL: " + self.url+"/content/") - - response = requests.post( - url=self.url+"/content/", - json=newPagejsonQuery, - auth=HTTPBasicAuth(self.username, self.password), - verify=True) - - logging.debug(response.status_code) - if response.status_code == 200: - logging.info("Created successfully") - logging.debug(json.dumps(json.loads( - response.text), indent=4, sort_keys=True)) - - # return new page id - logging.debug("Returning created page id: " + - json.loads(response.text)['id']) - return json.loads(response.text)['id'] - - # - # Function for searching pages with SEARCH TEST in the title - # - - def search_pages(self): - # make call using Confluence query language - # GET /rest/api/search?cql=text~%7B%22SEARCH%20PATTERN%22%7D+and+type=page+and+space=%2212345%22&limit=1000 HTTP/1.1" 200 - # "cqlQuery": "parent=301176119 and text~{\"SEARCH PATTERN\"} and type=page and space=\"12345\"" - - logging.debug("Calling URL: " + self.url + "/search?cql=parent=" + self.parent_page_id + - "+and+text~{\"" + self.page_title_suffix + - "\"}+and+type=page+and+space=\"" + - self.space_id + - "\"&limit=1000") - - def fatal_code(e): - return not 500 <= e.response.status_code < 600 - - # Exponential backoff for timeouts and server errors (500-599), fail on fatal errors - - @backoff.on_exception(backoff.expo, requests.exceptions.Timeout, max_tries=8) - @backoff.on_exception(backoff.expo, - requests.exceptions.RequestException, - giveup=fatal_code, - max_tries=4) - def get_request(url, auth): - response = requests.get( - url=url, - auth=auth, - verify=True - ) - # Raise an HTTPError for bad responses so it can be caught by backoff or fail the script - response.raise_for_status() - return response - - # Modify your existing code structure to use the get_request function - try: - response = get_request( - url=self.url + "/search?cql=text~{\"" + self.page_title_suffix + - "\"}+and+type=page+and+space=\"" + - self.space_id + - "\"&limit=1000", - auth=HTTPBasicAuth(self.username, self.password) - ) - except requests.exceptions.HTTPError as http_err: - logger.error(f"HTTP error occurred: {http_err}") - raise SystemExit(http_err) - except requests.exceptions.ConnectionError as conn_err: - logger.error(f"Connection error occurred: {conn_err}") - raise SystemExit(conn_err) - except requests.exceptions.Timeout as timeout_err: - # Should not reach here if `max_tries` has not been exceeded - logger.error( - f"Timeout error occurred after retries: {timeout_err}") - raise SystemExit(timeout_err) - except requests.exceptions.RequestException as req_err: - logger.error(f"Error making request: {req_err}") - raise SystemExit(req_err) - - logging.debug(response.status_code) - logging.debug(response.text) - logging.debug(json.dumps(json.loads( - response.text), indent=4, sort_keys=True)) - - # extract page's IDs from response JSON - results = json.loads(response.text) - foundPages = [] - - for result in results['results']: - foundPages.append(result['content']['id']) # add found page id - logging.info("Found page: " + result['content']['id'] + - " with title: " + result['content']['title']) - - logging.debug("Found pages in space " + self.space_id + " and parent page: " + - self.parent_page_id + " and search text: " + - self.page_title_suffix + ": " + str(foundPages)) - - return foundPages - - def delete_pages(self, pages_id_list): - - deletedPages = [] - - for page in pages_id_list: - logging.info("Delete page: " + str(page)) - logging.debug("Calling URL: " + - self.url + "/content/" + str(page)) - response = requests.delete( - url=self.url + "/content/" + str(page), - auth=HTTPBasicAuth(self.username, self.password), - verify=True) - logging.debug("Delete status code: " + str(response.status_code)) - if response.status_code == 204: - logging.info("Deleted successfully") - - return deletedPages - - def attach_file(self, page_id, attached_file): + Optional step for actions to perform before publishing, such as + fetching/deleting previously published resources. + Can be overridden by subclasses. """ - Attach a file to a Confluence page. + pass - Args: - page_id (str): ID of the Confluence page to attach the file to. - attached_file (file): The file to be attached. - - Returns: - str: The ID of the attached file or None if the attachment failed. + def post_publish_hook(self): """ + Optional step for actions to perform after publishing, such as + cleaning up resources or performing additional logging. + Can be overridden by subclasses. + """ + pass - # Construct the API endpoint URL - api_url = f"{self.url}/content/{page_id}/child/attachment" - - # Log the API call - logging.debug(f"Calling URL: {api_url}") - - # Set up file and comment data, headers, and disable SSL verification - attached_file_structure = {'file': attached_file} - attached_values = {'comment': 'File was attached by the script'} - - # TODO: Why do we need nocheck? document properly or remove - attached_header = { - "Accept": "application/json", - "X-Atlassian-Token": "nocheck" # Disable token check to avoid 403 status code - } - - # Make the POST request to attach the file - response = requests.post( - url=api_url, - files=attached_file_structure, - data=attached_values, - auth=HTTPBasicAuth(self.username, self.password), - headers=attached_header, - verify=True # Not recommended in production - ) - - # Log the response status code - logging.debug(response.status_code) - - if response.status_code == 200: - # Log success and parse JSON response - logging.info("File was attached successfully") - response_data = json.loads(response.text) - logging.debug(json.dumps(response_data, indent=4, sort_keys=True)) - - # Extract and return the ID of the attached file - attached_file_id = response_data['results'][0]['id'] - logging.debug(f"Returning attached file id: {attached_file_id}") - return attached_file_id - else: - # Log failure and return None - logging.error("File has not been attached") - return None - - # Confluence pages need unique titles - add some random strings at the end - - def generate_random_string(self, length=10): - return ''.join(random.choices(string.ascii_letters + string.digits, k=length)) - - def load_ignore_patterns(self, path): - if not path: - return [] - - patterns = [] - try: - with open(path, 'r') as file: - patterns = [line.strip() for line in file if line.strip() - and not line.startswith('#')] - print("loaded ignorepatterns", patterns) - except FileNotFoundError: - print(f"Unable to locate {path}, no patterns to ignore.") - return patterns - - # NOTE: Move this to __init__ when refactoring to Publisher class - - # Function to check if a file matches any of the ignore patterns - - def is_ignored(self, file_path): - spec = PathSpec.from_lines(GitWildMatchPattern, self.ignore_patterns) - return spec.match_file(file_path) - - def folderContainsMarkdown(self, folder_path): - for entry in os.scandir(folder_path): - if entry.is_dir() and self.folderContainsMarkdown(entry.path): - return True - elif entry.is_file() and entry.name.endswith('.md'): - return True - return False - - def publish_folder(self, folder, parent_page_id): - logging.info(f"Publishing folder: {folder}") - for entry in os.scandir(folder): - if self.is_ignored(entry.path): - return - if entry.is_dir(): - # Recursively publish directories that contain markdown files - if self.folderContainsMarkdown(entry.path): - self.publish_directory(entry, parent_page_id) - - elif entry.is_file() and entry.name.endswith('.md'): - # Publish only markdown files - self.publish_file(entry, parent_page_id) - - elif entry.is_symlink(): - logging.info(f"Found symlink: {entry.path}") - - def publish_directory(self, entry, parent_page_id): - logging.info(f"Found directory: {entry.path}") - current_page_id = self.create_page( - title=entry.name, - content="", - parent_page_id=parent_page_id, - ) - self.publish_folder(entry.path, current_page_id) - - def publish_file(self, entry, parent_page_id): - logging.info(f"Found file: {entry.path}") - - if entry.name.lower().endswith('.md'): - self.process_markdown_file(entry, parent_page_id) - else: - logging.info( - f"File: {entry.path} is not a MD file. Publishing has been rejected.") - - def process_markdown_file(self, entry, parent_page_id): - new_file_content, files_to_upload = self.process_markdown_content( - entry.path) - - page_id_for_file_attaching = self.create_page( - title=entry.name, - content=markdown(new_file_content, extensions=[ - 'markdown.extensions.tables', 'fenced_code']), - parent_page_id=parent_page_id, - ) + def publish_content(self, content_tree: ContentTree): + logger.debug("ContentTree:\n%s", content_tree) - self.upload_attachments(files_to_upload, page_id_for_file_attaching) + root_page = self.config.confluence_root_page + if root_page: + content_tree.rename_root(root_page) - def process_markdown_content(self, file_path): - new_file_content = "" - files_to_upload = [] + self.pre_publish_hook() - with open(file_path, 'r', encoding="utf-8") as md_file: - for line in md_file: - result = re.findall(r"\A!\[.*]\((?!http)(.*)\)", line) - if result: - result = result[0] - logging.debug(f"Found file for attaching: {result}") - print(f"Found file for attaching: {result}") - files_to_upload.append(result) - new_file_content += f" " - else: - new_file_content += line + def traverse_and_publish( + node: ContentNode, + parent_id: str | None = None): + logger.debug("Processing node: %s", node.name) - return new_file_content, files_to_upload + if node.is_root(): + root_page = self.config.confluence_root_page + if root_page not in (None, ''): + parent_id = self.publish_node( + node, self.config.confluence_parent_page_id) + else: + parent_id = self.publish_node(node, parent_id) - def upload_attachments(self, files_to_upload, page_id_for_file_attaching): - if files_to_upload: - for file in files_to_upload: - print("file: ", file) + for child in node.children.values(): + traverse_and_publish(child, parent_id) - # NOTE: Find the problem that this solves and fix it in a better way - if file.startswith('/'): - file = '.' + file + traverse_and_publish(content_tree.root) - image_path = os.path.join( - self.markdown_folder, file) - if os.path.isfile(image_path): - logging.info( - f"Attaching file: {image_path} to the page: {page_id_for_file_attaching}") - with open(image_path, 'rb') as attached_file: - self.attach_file( - page_id=page_id_for_file_attaching, - attached_file=attached_file, - ) - else: - logging.error( - f"File: {image_path} not found. Nothing to attach") + self.post_publish_hook() diff --git a/markdown2confluence/util.py b/markdown2confluence/util.py new file mode 100644 index 0000000..fed5b5f --- /dev/null +++ b/markdown2confluence/util.py @@ -0,0 +1,51 @@ +import os +from logging.handlers import RotatingFileHandler +import logging + + +class Logger: + def __init__( + self, + name: str, + log_file: str = 'markdown2confluence.log', + level: int = logging.DEBUG if os.getenv("DEBUG") else logging.INFO + ): + """ + Initialize the Logger with a specified name and log file. + + :param name: Name of the logger, usually __name__ is passed + to get the module's name. + :param log_file: File path for the log file. + :param level: Logging level, default is logging.INFO. + """ + self.logger = logging.getLogger(name) + self.logger.setLevel(level) + + # Create log directory if it doesn't exist + log_dir = os.path.dirname(log_file) + if log_dir and not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Create a file handler which logs even debug messages + file_handler = RotatingFileHandler( + log_file, maxBytes=1024*1024*5, backupCount=5) + file_handler.setLevel(level) + + console_handler = logging.StreamHandler() + console_handler.setLevel(level) + + # Create formatter and add it to the handlers + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + file_handler.setFormatter(formatter) + console_handler.setFormatter(formatter) + + # Add the handlers to the logger + self.logger.addHandler(file_handler) + self.logger.addHandler(console_handler) + + def get_logger(self): + """ + Return the configured logger. + """ + return self.logger diff --git a/markdown2confluence/version.py b/markdown2confluence/version.py new file mode 100644 index 0000000..d3ec452 --- /dev/null +++ b/markdown2confluence/version.py @@ -0,0 +1 @@ +__version__ = "0.2.0" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..570bfdd --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,22 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "markdown2confluence" +version = "0.2.0-rc.5" +description = "A tool to publish markdown to Confluence pages" +readme = "README.md" +license = {text = "MIT"} +authors = [ + { name="Otto Lote", email="otto.lote@innofactor.com" } +] + +dependencies = [ + # List your dependencies here + # "requests>=2.23.0", + # "beautifulsoup4>=4.9.0" +] + +[project.scripts] +markdown2confluence = "markdown2confluence.main:main_function" diff --git a/requirements.txt b/requirements.txt index c185131..843be60 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,16 @@ -atlassian-python-api==3.41.11 -backoff==2.2.1 -beautifulsoup4==4.12.3 -certifi==2024.2.2 -charset-normalizer==3.3.2 -Deprecated==1.2.14 -idna==3.7 -jmespath==1.0.1 -Markdown==3.6 -oauthlib==3.2.2 -pathspec==0.12.1 -requests==2.31.0 -requests-oauthlib==2.0.0 -six==1.16.0 -soupsieve==2.5 -urllib3==2.2.1 -wrapt==1.16.0 +-i https://pypi.org/simple +atlassian-python-api==3.41.13 +beautifulsoup4==4.12.3 ; python_full_version >= '3.6.0' +certifi==2024.2.2 ; python_version >= '3.6' +charset-normalizer==3.3.2 ; python_full_version >= '3.7.0' +deprecated==1.2.14 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +idna==3.7 ; python_version >= '3.5' +jmespath==1.0.1 ; python_version >= '3.7' +markdown==3.6 +oauthlib==3.2.2 ; python_version >= '3.6' +requests==2.32.3 ; python_version >= '3.8' +requests-oauthlib==2.0.0 ; python_version >= '3.4' +six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +soupsieve==2.5 ; python_version >= '3.8' +urllib3==2.2.1 ; python_version >= '3.8' +wrapt==1.16.0 ; python_version >= '3.6' diff --git a/setup.py b/setup.py deleted file mode 100644 index 8960abb..0000000 --- a/setup.py +++ /dev/null @@ -1,15 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name='markdown2confluence', - version='0.1.0-alpha', - packages=find_packages(), - install_requires=[ - # dependencies - ], - entry_points={ - 'console_scripts': [ - 'markdown2confluence = markdown2confluence.main:main_function', - ], - }, -) diff --git a/tests/unit/test_api.py b/tests/unit/test_api.py new file mode 100644 index 0000000..ece121f --- /dev/null +++ b/tests/unit/test_api.py @@ -0,0 +1,95 @@ +import unittest +from unittest.mock import patch, MagicMock +from markdown2confluence.api import MinimalConfluence +from requests.exceptions import HTTPError + + +class TestMinimalConfluence(unittest.TestCase): + + def setUp(self): + self.url = 'https://confluence.example.com' + self.username = 'user' + self.password = 'pass' + self.confluence = MinimalConfluence(self.url, self.username, self.password) + + @patch('markdown2confluence.api.requests.Session.request') + def test_request(self, mock_request): + mock_request.return_value.status_code = 200 + mock_request.return_value.json.return_value = {'key': 'value'} + self.assertEqual( + self.confluence._request('GET', 'path'), + {'key': 'value'} + ) + + mock_request.return_value.raise_for_status.side_effect = HTTPError + with self.assertRaises(HTTPError): + self.confluence._request('GET', 'path') + + @patch('markdown2confluence.api.requests.Session.request') + def test_get(self, mock_request): + self.confluence._get('path') + mock_request.assert_called_with('GET', f'{self.url}/path') + + @patch('markdown2confluence.api.requests.Session.request') + def test_post(self, mock_request): + self.confluence._post('path') + mock_request.assert_called_with('POST', f'{self.url}/path') + + @patch('markdown2confluence.api.requests.Session.request') + def test_put(self, mock_request): + self.confluence._put('path') + mock_request.assert_called_with('PUT', f'{self.url}/path') + + @patch('markdown2confluence.api.requests.Session.request') + def test_search(self, mock_request): + # Since search is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.search('cql') + + @patch('markdown2confluence.api.requests.Session.request') + def test_create_page(self, mock_request): + # Since create_page is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.create_page( + space='SPACE', title='Title', body='Body', parent_id=None + ) + + @patch('markdown2confluence.api.requests.Session.request') + def test_update_page(self, mock_request): + # Since update_page is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.update_page(page_id='123', title='Title', body='Body', + version=None) + + @patch('markdown2confluence.api.requests.Session.request') + def test_remove_page(self, mock_request): + # Since remove_page is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.remove_page(page_id=None) + + @patch('markdown2confluence.api.requests.Session.request') + def test_create_attachment(self, mock_request): + # Since create_attachment is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.create_attachment( + page_id='123', file_path='file.txt', comment=None + ) + + @patch('markdown2confluence.api.requests.Session.request') + def test_get_attachments(self, mock_request): + # Since get_attachments is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.get_attachments(page_id=None) + + @patch('markdown2confluence.api.requests.Session.request') + def test_update_attachment(self, mock_request): + # Since update_attachment is not implemented, we test for a NotImplementedError + with self.assertRaises(NotImplementedError): + self.confluence.update_attachment( + attachment_id='123', file_path='file.txt', comment=None + ) + + +if __name__ == '__main__': + unittest.main() + diff --git a/tests/unit/test_content_tree.py b/tests/unit/test_content_tree.py new file mode 100644 index 0000000..c660c5b --- /dev/null +++ b/tests/unit/test_content_tree.py @@ -0,0 +1,83 @@ +import unittest +from markdown2confluence.content_tree import ContentNode, ContentTree + + +class TestContentNode(unittest.TestCase): + def setUp(self): + self.root = ContentNode(name='root') + self.child1 = ContentNode(name='child1', parent=self.root) + self.child2 = ContentNode(name='child2', parent=self.root) + self.root.add_child(self.child1) + self.root.add_child(self.child2) + + def test_add_child(self): + self.assertIn('child1', self.root.children) + self.assertIn('child2', self.root.children) + + def test_get_child(self): + self.assertEqual(self.root.get_child('child1'), self.child1) + self.assertIsNone(self.root.get_child('nonexistent')) + + def test_is_leaf(self): + self.assertFalse(self.root.is_leaf()) + self.assertTrue(self.child1.is_leaf()) + + def test_is_root(self): + self.assertTrue(self.root.is_root()) + self.assertFalse(self.child1.is_root()) + + def test_str(self): + expected = "'root'\n\t'child1'\n\t'child2'\n" + self.assertEqual(str(self.root), expected) + + +class TestContentTree(unittest.TestCase): + def setUp(self): + self.tree = ContentTree() + + def test_add_node(self): + self.tree.add_node(['level1', 'level2'], content='test content') + node = self.tree.find_node(['level1', 'level2']) + self.assertIsNotNone(node) + self.assertEqual(node.content, 'test content') + + def test_find_node(self): + self.tree.add_node(['level1', 'level2a']) + self.tree.add_node(['level1', 'level2b']) + node_a = self.tree.find_node(['level1', 'level2a']) + node_b = self.tree.find_node(['level1', 'level2b']) + self.assertIsNotNone(node_a) + self.assertIsNotNone(node_b) + self.assertIsNone(self.tree.find_node(['nonexistent'])) + + def test_tree_str(self): + self.tree.add_node(['level1', 'level2'], content='test content') + expected = "'root'\n\t'level1'\n\t\t'level2'\n" + self.assertEqual(str(self.tree), expected) + + def test_add_node_nested(self): + self.tree.add_node(['level1', 'level2', 'level3'], content='nested') + node = self.tree.find_node(['level1', 'level2', 'level3']) + self.assertIsNotNone(node) + self.assertEqual(node.content, 'nested') + + def test_add_node_empty_parent(self): + with self.assertRaises(ValueError): + self.tree.add_node(['', 'level1']) + + def test_find_node_invalid(self): + self.assertIsNone(self.tree.find_node(['level1', 'nonexistent'])) + + def test_multiple_children(self): + self.tree.add_node(['level1', 'child1']) + self.tree.add_node(['level1', 'child2']) + node1 = self.tree.find_node(['level1', 'child1']) + node2 = self.tree.find_node(['level1', 'child2']) + self.assertIsNotNone(node1) + self.assertIsNotNone(node2) + + def test_add_same_node_twice(self): + self.tree.add_node(['level1', 'level2'], content='first') + self.tree.add_node(['level1', 'level2'], content='second') + node = self.tree.find_node(['level1', 'level2']) + self.assertEqual(node.content, 'second') diff --git a/tests/unit/test_converter.py b/tests/unit/test_converter.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py new file mode 100644 index 0000000..3ea0df0 --- /dev/null +++ b/tests/unit/test_parser.py @@ -0,0 +1,238 @@ +import unittest +from unittest import mock +from markdown2confluence.parser import MarkdownParser + + +class TestMarkdownParser(unittest.TestCase): + + def setUp(self): + self.parser = MarkdownParser() + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_invalid_path( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.side_effect = FileNotFoundError + with self.assertRaises(FileNotFoundError): + self.parser.parse_directory('non_existing_directory') + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_empty( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.return_value = iter([]) + tree = self.parser.parse_directory('/emptydir') + self.assertEqual(tree.root.name, 'root') + self.assertEqual(tree.root.children, {}) + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_with_non_markdown_files( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.return_value = iter( + [('/dir_with_non_md_files', [], ['test.txt'])]) + tree = self.parser.parse_directory('/dir_with_non_md_files') + self.assertEqual(tree.root.children, {}) + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_with_nested_structure( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.return_value = iter([ + ('/nesteddir', ['nested'], []), + ('/nesteddir/nested', [], ['test.md']) + ]) + tree = self.parser.parse_directory('/nesteddir') + self.assertIn('nested', tree.root.children) + self.assertIn('test.md', tree.root.children['nested'].children) + + @mock.patch('markdown2confluence.parser.MarkdownParser._read_file_content') + @mock.patch('os.walk') + def test_parse_directory_with_multiple_markdown_files( + self, mock_walk, mock_read_content): + mock_read_content.side_effect = lambda file_path: \ + "Content of " + file_path + mock_walk.return_value = iter([ + ('/dir_with_multiple_md', [], ['test1.md', 'test2.md']) + ]) + tree = self.parser.parse_directory('/dir_with_multiple_md') + self.assertIn('test1.md', tree.root.children) + self.assertIn('test2.md', tree.root.children) + self.assertEqual( + tree.root.children['test1.md'].content, + "Content of /dir_with_multiple_md/test1.md") + self.assertEqual( + tree.root.children['test2.md'].content, + "Content of /dir_with_multiple_md/test2.md") + + def test_get_media_references_no_media(self): + markdown = """ + # Title + This is a test markdown without any media links. + """ + result = self.parser._get_media_references(markdown) + self.assertEqual(result, []) + + def test_get_media_references_with_local_media(self): + markdown = """ + # Title + ![Alt text](image1.png) + Some text. + ![Alt text](folder/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['image1.png', 'folder/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_mixed_links(self): + markdown = """ + # Title + ![Alt text](image1.png) + Some text. + ![Alt text](http://example.com/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['image1.png'] + self.assertEqual(result, expected) + + def test_get_media_references_with_relative_links(self): + markdown = """ + # Title + ![Alt text](./relative/image1.png) + Some text. + ![Alt text](../parent/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['./relative/image1.png', '../parent/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_nested_links(self): + markdown = """ + # Title + ![Alt text](folder/subfolder/image1.png) + Some text. + ![Alt text](folder/subfolder/deep/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['folder/subfolder/image1.png', + 'folder/subfolder/deep/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_empty_url(self): + markdown = """ + # Title + ![Alt text]() + Some text. + ![Alt text](folder/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['folder/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_broken_markdown(self): + markdown = """ + # Title + ![Alt text](image1.png + Some text. + ![Alt text](folder/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['folder/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_html_image_tag(self): + markdown = """ + # Title + Alt text + Some text. + ![Alt text](folder/image2.jpg) + """ + result = self.parser._get_media_references(markdown) + expected = ['folder/image2.jpg'] + self.assertEqual(result, expected) + + def test_get_media_references_with_special_chars(self): + markdown = """ + # Title + ![Alt text](image_1.png) + Some text. + ![Alt text](folder/image-2.jpg) + ![Alt text](folder/image.3.gif) + ![Alt text](folder/image@4.bmp) + ![Alt text](folder/image#5.jpeg) + ![Alt text](folder/image$6.png) + ![Alt text](folder/image&7.jpg) + ![Alt text](folder/image(8).gif) + ![Alt text](folder/image)9.bmp) + ![Alt text](folder/image+10.jpeg) + ![Alt text](folder/image,11.png) + ![Alt text](folder/image;12.jpg) + ![Alt text](folder/image=13.gif) + ![Alt text](folder/image[14].bmp) + ![Alt text](folder/image]15.jpeg) + ![Alt text](folder/image{16}.png) + ![Alt text](folder/image}17.jpg) + ![Alt text](folder/image~18.gif) + ![Alt text](folder/image!19.bmp) + ![Alt text](folder/image%20.jpeg) + """ + result = self.parser._get_media_references(markdown) + expected = [ + 'image_1.png', 'folder/image-2.jpg', 'folder/image.3.gif', + 'folder/image@4.bmp', 'folder/image#5.jpeg', 'folder/image$6.png', + 'folder/image&7.jpg', 'folder/image(8).gif', 'folder/image)9.bmp', + 'folder/image+10.jpeg', 'folder/image,11.png', 'folder/image;12.jpg', + 'folder/image=13.gif', 'folder/image[14].bmp', 'folder/image]15.jpeg', + 'folder/image{16}.png', 'folder/image}17.jpg', 'folder/image~18.gif', + 'folder/image!19.bmp', 'folder/image%20.jpeg' + ] + self.assertEqual(result, expected) + + def test_get_relative_path_as_list(self): + base_directory = '/home/user/project' + file_path = '/home/user/project/docs/file.md' + expected = ['docs', 'file.md'] + result = self.parser._get_relative_path_as_list( + file_path, base_directory) + self.assertEqual(result, expected) + + file_path = '/home/user/project/docs/subdir/file.md' + expected = ['docs', 'subdir', 'file.md'] + result = self.parser._get_relative_path_as_list( + file_path, base_directory) + self.assertEqual(result, expected) + + file_path = '/home/user/project/file.md' + expected = ['file.md'] + result = self.parser._get_relative_path_as_list( + file_path, base_directory) + self.assertEqual(result, expected) + + file_path = '/home/user/another_project/file.md' + expected = ['..', 'another_project', 'file.md'] + result = self.parser._get_relative_path_as_list( + file_path, base_directory) + self.assertEqual(result, expected) + + @mock.patch('os.path.exists') + @mock.patch('builtins.open', new_callable=mock.mock_open, read_data='test content') + def test_read_file_content_success(self, mock_open, mock_exists): + mock_exists.return_value = True + result = self.parser._read_file_content('/path/to/file.md') + mock_open.assert_called_once_with( + '/path/to/file.md', 'r', encoding='utf-8') + self.assertEqual(result, 'test content') + + @mock.patch('os.path.exists') + def test_read_file_content_file_not_found(self, mock_exists): + mock_exists.return_value = False + with self.assertRaises(FileNotFoundError): + self.parser._read_file_content('/path/to/nonexistent.md') diff --git a/tests/unit/test_publisher.py b/tests/unit/test_publisher.py index e69de29..18a2e75 100644 --- a/tests/unit/test_publisher.py +++ b/tests/unit/test_publisher.py @@ -0,0 +1,117 @@ +import unittest +from unittest import mock +from markdown2confluence.publisher import Publisher +from markdown2confluence.config import Config +from markdown2confluence.content_tree import ContentTree, ContentNode + + +# Create a mock publisher inheriting from Publisher to test abstract methods +class MockPublisher(Publisher): + def publish_node(self, node, parent_id): + return '' + + +class TestPublisher(unittest.TestCase): + def setUp(self): + self.publisher = MockPublisher(config=Config.__new__(Config)) + self.publisher.config.confluence_root_page = None + self.publisher.config.confluence_parent_page_id = 'mock_parent_id' + + self.mock_publish_node = mock.patch.object( + MockPublisher, 'publish_node', autospec=True + ).start() + self.mock_publish_node.side_effect = ( + lambda __self__, node, __parent_id__: f"mock_id_for_{node.name}" + ) + self.addCleanup(mock.patch.stopall) + + def test_publish_nested(self): + self.publisher.config.confluence_root_page = '' + + # Create a simple content tree + root = ContentNode(name='root') + child1 = ContentNode(name='child1') + child2 = ContentNode(name='child2') + child3 = ContentNode(name='child3') + root.add_child(child1) + root.add_child(child2) + child2.add_child(child3) + content_tree = ContentTree(root=root) + + # Call publish_content + self.publisher.publish_content(content_tree) + + self.assertEqual(self.mock_publish_node.call_count, 3) + self.mock_publish_node.assert_any_call( + self.publisher, child1, None + ) + self.mock_publish_node.assert_any_call( + self.publisher, child2, None + ) + self.mock_publish_node.assert_any_call( + self.publisher, child3, 'mock_id_for_child2' + ) + + def test_publish_root_page(self): + self.publisher.config.confluence_root_page = 'rootpagename' + + # Create a simple content tree + root = ContentNode(name='root') + child1 = ContentNode(name='child1') + child2 = ContentNode(name='child2') + root.add_child(child1) + root.add_child(child2) + content_tree = ContentTree(root=root) + + # Call publish_content + self.publisher.publish_content(content_tree) + + self.assertEqual(self.mock_publish_node.call_count, 3) + self.mock_publish_node.assert_any_call( + self.publisher, root, 'mock_parent_id' + ) + self.mock_publish_node.assert_any_call( + self.publisher, child1, 'mock_id_for_rootpagename' + ) + self.mock_publish_node.assert_any_call( + self.publisher, child2, 'mock_id_for_rootpagename' + ) + + def test_publish_with_circular_reference(self): + # Create nodes with circular references + node_a = ContentNode(name='A') + node_b = ContentNode(name='B') + node_a.add_child(node_b) + node_b.add_child(node_a) + content_tree = ContentTree(root=node_a) + + with self.assertRaises(RuntimeError): + self.publisher.publish_content(content_tree) + + def test_publish_with_missing_root(self): + # Create a content tree without a root + content_tree = ContentTree(root=None) + + with self.assertRaises(AttributeError): + self.publisher.publish_content(content_tree) + + def test_publish_with_none_node(self): + # Create a content tree with a None node + content_tree = ContentTree(root=None) + + with self.assertRaises(AttributeError): + self.publisher.publish_content(content_tree) + + def test_pre_post_hooks_called(self): + self.publisher.pre_publish_hook = mock.MagicMock() + self.publisher.post_publish_hook = mock.MagicMock() + + # Create a simple content tree + root = ContentNode(name='root') + content_tree = ContentTree(root=root) + + # Call publish_content + self.publisher.publish_content(content_tree) + + self.publisher.pre_publish_hook.assert_called_once() + self.publisher.post_publish_hook.assert_called_once()