diff --git a/.gitignore b/.gitignore index 7ba61e6..588fa4f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,13 @@ -*.csv -.vscode -q_and_a \ No newline at end of file +/* +**/*.egg-info/ +**/__pycache__/ +!assets/ +!resources/ +!src/ +!tests/ +!.gitignore +!LICENSE +!pyproject.toml +!README.md +!release.ps1 +!requirements.txt diff --git a/LICENSE b/LICENSE index a732d9e..0d5f011 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,7 @@ MIT License +Copyright (c) 2024 Wei Cheng + Copyright (c) 2024 Mahmoud Abdelkhalek Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/README.md b/README.md index 5063840..b32e3d8 100644 --- a/README.md +++ b/README.md @@ -1,208 +1,295 @@ # Stack Exchange Backup -| | | -| --- | --- | -| | | - -Download all of your questions and answers as Markdown files from all Stack Exchange (SE) sites using a Python script and the [SE API](https://api.stackexchange.com/). - -# Getting started - -To get started, make sure that Python version 3.11.7 or newer is installed. Then, run the -following commands (note that these commands were tested in Powershell, and so only one of these commands will need to be translated for use in bash, zsh, etc.): -```bash -git clone https://github.com/mhdadk/stack-exchange-backup.git -cd stack-exchange-backup -# create a virtual environment -python -m venv .venv -# activate the virtual environment. This command should be translated when using bash -.venv\Scripts\activate -python -m pip install "requests==2.31.0" -``` -Finally, determine your SE network user ID by doing the following: +> [!CAUTION] +> Stack Exchange Backup is intended as a backup tool for your own personal writings on the Stack Exchange network sites +> in the form of questions and answers. +> It is currently alpha software, so nothing is set in stone yet. +> If you publish the files created with this script, +> you are fully responsible for the compliance with the terms of the content licenses, +> as the attributions data may be incorrect or incomplete. +> Due to technical difficulties, some user contents may be missing from the backup. +> Please refer to the [omissions section](#omissions) for additional details. -1. Go to https://stackexchange.com/. -2. Log in to your SE account. -3. Click on your profile picture on the top-right, as indicated by the red arrow in the screenshot below. -![](assets/se_click.png) -4. Go to the address bar in your browser, and the address should be in the form `https://stackexchange.com/users//`. For example, the screenshot below shows my `user id` as `9073934` and my `user name` as `mhdadk`. Note/save the `user id` from the address bar that is shown for your account, as this will be needed later. -![](assets/address_bar_userid.png) +> [!NOTE] +> This software is NOT an official product of, nor is it affiliated with, endorsed by, or sponsored by, +> Stack Exchange, Inc. -Go to the "Usage" section below for the final step. +## Showcase -# Usage +![Program run demo](assets/demo.avif) +![Example download file](assets/markdown.png) -Once the steps under the section "Getting started" above are done, you can then download all of your questions and answers from all SE sites by running the following command: -```powershell -python main.py --user_id -``` -where `` should be replaced with your own `user id` that you obtained by following the instructions under the "Getting started" section above. See the "Format" section below for details on the format of the files that are downloaded. - -**NOTE**: you may notice that, for some SE sites, there are fewer answers downloaded -under the `answers` directory than the number of answers shown on the SE site itself -online. The reason for this is that more than one of your answers may be associated with -the same question. In this case, the question and your multiple answers for it are -downloaded once only. - -# Format - -Once the command under the "Usage" section is run, a `q_and_a` directory will be created inside the directory from which the command was run. This directory will have the following structure: -```bash -.com -|--- questions -|---|--- .md -|---|--- .md -|---|--- ... -|--- answers -|---|--- .md -|---|--- .md -|---|--- ... -.com -|--- questions -|---|--- .md -|---|--- .md -|---|--- ... -|--- answers -|---|--- .md -|---|--- .md -|---|--- ... -... -``` -where -* `` is the name for the `n`th SE site associated with a user. -* `` is the question ID associated with `n`th question for the parent SE site. -* `` is the question ID associated with the `n`th answer for the parent SE site. +## Installation + +1. Either download the repository as a ZIP file and extract it, + or [install Git](https://git-scm.com/downloads) (recommended) and do a `git clone` of the project. + + ```shell + git clone https://github.com/9ao9ai9ar/stack-exchange-backup.git + ``` + +2. [Install Python 3.12 or newer](https://www.python.org/downloads/). + See the [support section](#support) for additional information. + +3. Enter the directory you just extracted/cloned. -The `q_and_a` directory will contain Markdown files with the extension `.md`. Each Markdown -file will represent either a question or an answer, depending on whether it is under a -`questions` directory or an `answers` directory. If the Markdown file represents a -question, then the question creator will be you. Otherwise, if the Markdown file -represents an answer, the question creator will not be you, but the creator of one of -the answers included in the Markdown file will be you. More specifically, each Markdown -file will have the following format (text that is inside angle brackets, such as ``, -represents text that will vary for each Markdown file): -```markdown -Question downloaded from -Question asked by on at . -Number of up votes: -Number of down votes: -Score: -# - + ```shell + cd stack-exchange-backup + ``` -### Comment 1 -Comment made by on at . -Comment score: + All steps hereafter assume operations under said directory. - +4. Create and activate a virtual environment (strongly recommended). -... + * Windows: -### Comment n -Comment made by on at . -Comment score: + ```shell + py -3 -m venv .venv + .\.venv\Scripts\activate + ``` - + * macOS/Linux: -## Answer 1 -Answer by on at . -This the accepted answer. -Number of up votes: -Number of down votes: -Score: + ```shell + python3 -m venv .venv + . .venv/bin/activate + ``` - +5. Install `stack-exchange-backup` as a local Python package. -### Comment 1 -Comment made by on at . -Comment score: + ```shell + python -m pip install . + ``` - + If the script fails to run due to changing dependencies, you may install the last known working versions. + + ```shell + python -m pip install -r ./requirements.txt + ``` + +## Usage + +Remember to activate the virtual environment first! + +```console +(.venv) $ python -m stackexchange.backup --help +usage: backup.py [-h] --account-id ACCOUNT_ID [--out-dir OUT_DIR] [--format {markdown,json}] [--no-meta] + [--clean] [--api-key API_KEY] [--limit-rate LIMIT_RATE] + +options: + -h, --help show this help message and exit + --account-id ACCOUNT_ID + user account ID on stackexchange.com + --out-dir OUT_DIR output directory (defaults to the current working directory) + --format {markdown,json} + output file format (default: markdown) + --no-meta do not back up posts on meta sites + --clean remove files from the stack_user_id subdirectory before back up + --api-key API_KEY API key (for debugging only) + --limit-rate LIMIT_RATE + maximum request rate in requests per second within the integer range of 1 and 30 + inclusive (default: 10) +``` -... +* `ACCOUNT_ID`: the ID of the Stack Exchange network account whose posts you want to back up. + Note that this is different from the per-site user IDs. + To acquire the `ACCOUNT_ID` of a user: -### Comment n -Comment made by on at . -Comment score: + 1. Go to the user's profile page on one of the Stack Exchange network sites + and click on either the *View all* link next to *Communities* + or the *Network profile* link in the dropdown under *Profiles*. - + ![Jeff Atwood's Stack Overflow user profile page](assets/network_user.png) -... + 2. On the new web page that is just opened, note the URL segment after `users` consists of a number: + this is the `ACCOUNT_ID` of the user (1 in the case of Jeff Atwood). -## Answer m -Answer by on at . -This the accepted answer. -Number of up votes: -Number of down votes: -Score: + ![Jeff Atwood's Stack Exchange account page](assets/account_id.png) - +* `OUT_DIR`: the folder to download your files to. -### Comment 1 -Comment made by on at . -Comment score: +* `API_KEY`: a token that grants an increased query quota. + A default API key is included and used automatically in the script. + To access the API without using a key, assign an empty string as the value to this option. - +* `LIMIT_RATE`: a soft limit imposed on the running program. + It is [stated](https://api.stackexchange.com/docs/throttle) in no uncertain terms that + the Stack Exchange API considers 30+ requests per second per IP to be very abusive, + and will thus ban any rogue IP from making further requests to it for a period of time, typically within a few minutes. -... +## Output -### Comment n -Comment made by on at . -Comment score: +### Directory Layout - +The output directory layout is mostly a replication of the short form structures of +the [Stack Exchange engine URLs](https://meta.stackexchange.com/q/332237) with minor differences. + +```txt +stack_user_/ + / + a/ + / + index.md + .md + .md + ... + ... + q/ + / + index.md + .md + ... + ... + ... +``` + +### File Layout + +The default Markdown output file layout contains a YAML front matter block, +which is a way to add metadata to generated web pages in many static site generators. + +```yaml +--- +title: str # questions only +tags: # questions only +- str +view_count: int # questions only +is_accepted: bool # answers only +awarded_bounty_amount: int # answers only +score: int +up_vote_count: int +down_vote_count: int +owner: + display_name: str + user_type: str + reputation: int + link: str +creation_date: str +last_edit_date: str +community_owned_date: str +content_license: str +share_link: str +comments: +- score: int + creation_date: str + content_license: str + link: str + owner: + display_name: str + user_type: str + reputation: int + link: str + body_markdown: str +--- +{{ post.body_markdown }} ``` -See the "Logic" section below for an overview of how the `main.py` file works. +### Omissions + +| Items | Reason | +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Deleted posts | [The API does not provide a way to retrieve deleted posts](https://stackapps.com/q/1917), even when authenticated. | +| (Some) community wiki posts | The API does not seem to provide an easy or reliable way to retrieve community wikis of which a user is a co-author but not the original poster. The authorships of community wikis are also difficult to programmatically determine and be given proper attributions. Additional reading: *[What are "Community Wiki" posts?](https://meta.stackexchange.com/q/11740)* | +| (Some) migrated posts | A migrated post can not be permanently linked back to the owner until they register for an account on the target site and associate it to their network profile. Additional reading: *[What is migration and how does it work?](https://meta.stackexchange.com/q/10249)* | +| Answers to merged questions | In this rather rare occurrence, all of the merged question's answers become answers to the target question. Although the combined answers to the target question can be retrieved, it may be confusing to include them as they may quote from the target question and have an accepted status that the owner of the merged question might not agree with. The inclusion of this category of items may be revisited in the future. Additional reading: *[What is a "merged" question?](https://meta.stackexchange.com/q/158066)* | +| Area 51 posts | [Area 51 Discussions is not adequately supported in the API](https://stackapps.com/q/8726), and few people participated on this site. | +| Articles | Being a part of collectives, articles have only been rolled out to Stack Overflow, and fewer than 200 articles have been [published](https://stackoverflow.com/collectives/articles) to date since its inception in 2021. Therefore, I have concluded it is not worth the effort to add support for backing up articles, despite them still being queryable through the [`/users/{ids}/posts`](https://api.stackexchange.com/docs/posts-on-users) endpoint after [`/articles` has been removed from the API](https://stackapps.com/q/10456). | +| Saves | When public favorites, also briefly known as bookmarks, got reworked into private saves, it was done without coordinated changes to the API, so [it became impossible to query a user's saves through the API](https://meta.stackexchange.com/q/382991). | + +## Related Projects + +### [Stack Exchange API](https://api.stackexchange.com/) + +As one of the three official gateways to the public data on the Stack Exchange network, +the API is the most conducive to application development, but is also mired in bugs and limitations. +Therefore, it might be a good idea to cross-check or complement the API data with data obtained through other means. + +#### [mhdadk/stack-exchange-backup](https://github.com/mhdadk/stack-exchange-backup) + +The original repository from which this fork is derived. +I would like to express my thanks to its author, Mahmoud Abdelkhalek, +for his well-commented code expedited my process of grokking the Stack Exchange API, which, +while conceptually simple, has its documentation of related topics, +some insufficiently explained, and the numerous bugs scattered all over the place. + +#### [StackExchangeBackupLaravel](https://github.com/ryancwalsh/StackExchangeBackupLaravelPHP) + +StackExchangeBackupLaravel allows exporting a somewhat complete data footprint of a user on the Stack Exchange network. +The user contents are saved in JSON and uploaded to Amazon S3 by default. + +### [Stack Exchange Data Explorer](https://data.stackexchange.com/) -# Logic +The Stack Exchange Data Explorer (SEDE) is an open source tool +for running arbitrary queries against public data from the Stack Exchange network. +There are ready-made queries to export your data to a +[single HTML file](https://data.stackexchange.com/meta.stackexchange/query/758326) +or [CSV file](https://data.stackexchange.com/meta.stackexchange/query/1529864), +but the underlying data are only [refreshed weekly](https://data.stackexchange.com/help#faq), +as opposed to the data returned by the API, which are [refreshed about once a minute](https://stackapps.com/q/3543). -This section is intended for anyone interested in how the `main.py` file works, and is -optional reading. The following steps are added as comments (as `#%% step X`) inside -the `main.py` file to indicate which part of the file corresponds to which step below. +#### [Pippim Website](https://www.pippim.com/programs/stack.html) -The `main.py` script proceeds as follows: -1. Given a network user ID, obtain the names of all the SE sites associated with this -user ID and the corresponding site ID associated with each site. -2. Create the top-level directory `q_and_a`. -3. For each SE site obtained in step 1: +A demo website that comes with a set of procedures and programs to help +convert your Stack Exchange posts into a fancy GitHub Pages website. - (a) Create the `questions` directory for this SE site. +### [Stack Exchange Data Dump](https://stackoverflow.com/help/data-dumps) - (b) Get all questions associated with this user on this SE site. +The quarterly dump of all user-contributed data on the Stack Exchange network. +In an [announcement](https://meta.stackexchange.com/q/401324) made in July 2024, +the data dumps will no longer be [uploaded](https://archive.org/details/stackexchange) to the Internet Archive; +instead, they will be provided from a section in the site user profile settings. +Therefore, this method of backup has a few major downsides: - (c) For each question associated with this user on this SE site, write the contents - of the question, its comments, the answers, and their comments into a Markdown file - using the format mentioned in the "Format" section above. +1. Being locked behind a login wall. +2. Being incomplete, meaning the data dump you download + is only for the specific site from which you initiated the request. +3. Being complete, meaning the download size may be humongous, and to get only your data, + you would have to do some non-trivial parsing of the downloaded XML files yourself. - (d) Create the `answers` directory for this SE site. +#### [Stack Exchange data dump downloader and transformer](https://github.com/LunarWatcher/se-data-dump-transformer) - (e) Get all answers associated with this user on this SE site. +Thankfully, this project exists to address some of the above pain points. - (f) For each answer associated with this user on this SE site, get the ID of the - question associated with the answer. +## Development - (g) For each question ID obtained in step 3(f), get the corresponding question, and - then write the contents of the question, its comments, the answers, including yours, - and their comments into a Markdown file using the format mentioned in the - "Format" section above. +My personal development process for this project is encoded in [`release.ps1`](./release.ps1), +a polyglot script that is valid in both the POSIX shell and PowerShell. +In addition to the dependencies specified in [`pyproject.toml`](./pyproject.toml), the script relies on the following utilities: -# Alternatives +* [uv](https://github.com/astral-sh/uv) +* [security-constraints](https://github.com/mam-dev/security-constraints) +* [Pyright](https://github.com/microsoft/pyright) -There are alternative ways of downloading all your questions and answers from each SE -site: +which need to be installed and configured separately as instructed in the comments therein. -## `stackapi` +To help you in your experimentation with the Stack Exchange API through the documentation web pages, +I have compiled a list of the parameter types and their associated icons as follows: -There exists a Python API for the SE API called [`stackapi`](https://github.com/AWegnerGitHub/stackapi) -that is built on top of the `requests` package. Although this API provides a nice -interface to the SE API, my goal here was to use as few dependencies as possible to -lower the risk of obscelesence later on. +* ![string-type](https://cdn.sstatic.net/apiv2/img/text.png) Strings +* ![number-type](https://cdn.sstatic.net/apiv2/img/number.png) + [Numbers](https://api.stackexchange.com/docs/numbers) +* ![date-type](https://cdn.sstatic.net/apiv2/img/calendar.png) + [Dates](https://api.stackexchange.com/docs/dates) +* ![list-type](https://cdn.sstatic.net/apiv2/img/list.png) + [Lists](https://api.stackexchange.com/docs/vectors) +* ![key-type](https://cdn.sstatic.net/apiv2/img/key.png) + [Keys](https://api.stackexchange.com/docs/authentication) +* ![access-token-type](https://cdn.sstatic.net/apiv2/img/access-token.png) + [Access Tokens](https://api.stackexchange.com/docs/authentication) -## Stack Exchange data explorer +Except for numbers and dates, the icons are not explained anywhere in the documentation, +but if you open the inspector in your web browser, +say when you are on [this page](https://api.stackexchange.com/docs/edit-question), +and check the `` nodes enclosing the icons you are interested in learning about, +you will find that the parameter types are named in the `class` attributes, as `string-type`, `number-type`, etc. -The [SE data explorer](https://data.stackexchange.com/) provides another way of obtaining -a copy of all your questions and answers across all SE sites, via [this query](https://data.stackexchange.com/stackoverflow/query/1811712/all-my-posts-on-the-se-network-with-markdown-and-html-content-plus-editors-and-s) for example. However, -this query only returns a CSV file, from which the relevant content will need to be -parsed and then written to Markdown files. Additionally, I am personally not familiar -with SQL, so I preferred the approach used in the `main.py` file. +## Support + +It is my policy to strive to support, within reason, +all [non-end-of-life, stable releases](https://devguide.python.org/versions/#status-key) of Python, +as well as all prominent, up-to-date Python implementations, namely CPython, PyPy and GraalPy. +If you are a Windows or macOS user, do note that official binaries are not provided for the security releases. +Thereby, I encourage you to instead install them from either the `defaults` (recommended) +or the `conda-forge` conda channel, by using one of the +[conda-compatible tools](https://conda.org/blog/2024-08-14-conda-ecosystem-explained/), +to benefit from the continuing security fixes. diff --git a/assets/account_id.png b/assets/account_id.png new file mode 100644 index 0000000..b64a07d Binary files /dev/null and b/assets/account_id.png differ diff --git a/assets/address_bar_userid.png b/assets/address_bar_userid.png deleted file mode 100644 index 92283fe..0000000 Binary files a/assets/address_bar_userid.png and /dev/null differ diff --git a/assets/demo.avif b/assets/demo.avif new file mode 100644 index 0000000..09dc82b Binary files /dev/null and b/assets/demo.avif differ diff --git a/assets/example1.png b/assets/example1.png deleted file mode 100644 index 88d8b0b..0000000 Binary files a/assets/example1.png and /dev/null differ diff --git a/assets/example2.png b/assets/example2.png deleted file mode 100644 index 50b775e..0000000 Binary files a/assets/example2.png and /dev/null differ diff --git a/assets/example3.png b/assets/example3.png deleted file mode 100644 index 0980238..0000000 Binary files a/assets/example3.png and /dev/null differ diff --git a/assets/markdown.png b/assets/markdown.png new file mode 100644 index 0000000..cf30723 Binary files /dev/null and b/assets/markdown.png differ diff --git a/assets/network_user.png b/assets/network_user.png new file mode 100644 index 0000000..d86f609 Binary files /dev/null and b/assets/network_user.png differ diff --git a/assets/se_backup.gif b/assets/se_backup.gif deleted file mode 100644 index 3b539ee..0000000 Binary files a/assets/se_backup.gif and /dev/null differ diff --git a/assets/se_click.png b/assets/se_click.png deleted file mode 100644 index 140ab06..0000000 Binary files a/assets/se_click.png and /dev/null differ diff --git a/main.py b/main.py deleted file mode 100644 index eb89305..0000000 --- a/main.py +++ /dev/null @@ -1,403 +0,0 @@ -import requests -import argparse -import pathlib -import datetime -import html -import time - -# parse command-line arguments -parser = argparse.ArgumentParser() -parser.add_argument("--user_id", - help='User network ID', - required=True, - type=str) -args = parser.parse_args() - -# need this Stack API key for a higher request quota per day. -# See also https://api.stackexchange.com/docs/authentication for details -api_key = "YLTVFmHkeJbm7ZIOoXstag((" - -# this must appear before every request -base_url = "https://api.stackexchange.com/2.3/" - -#%% step 1 -""" -Filters are useful for saving bandwidth and getting only the fields that you need in the -response. See https://api.stackexchange.com/docs/filters for details. - -There is no need for the "api_site_parameter" name if you have the site URL. See -https://api.stackexchange.com/docs, where it states: -> Each of these methods operates on a single site at a time, identified by the site -> parameter. This parameter can be the full domain name (ie. "stackoverflow.com"), or a -> short form identified by api_site_parameter on the site object. - -Need the ".backoff" parameter to detect throttling. See https://api.stackexchange.com/docs/throttle -for details. -""" -r = requests.get(base_url + f"filters/create", - params={"key":api_key, - "include":".items;"\ - ".has_more;"\ - ".page;"\ - ".page_size;"\ - ".quota_max;"\ - ".quota_remaining;"\ - ".backoff;"\ - "network_user.site_url;"\ - "network_user.user_id", - "base":"none", - "unsafe":"false"}) -network_users_filter = r.json()['items'][0]['filter'] - -""" -According to https://api.stackexchange.com/docs/paging, each response will -have a maximum number of 100 items (the "pagesize" parameter) under the "items" -field. Therefore, if there are more than 100 questions/answers/comment/etc., we will -need to process the first 100 questions, request the next 100 questions, process those, -and so on, until the "has_more" property is set to "False". To request the next 100 -questions, we will need to set the "page" property under the ".wrapper" category to "2", -where this property was set to "1" for the first page. See -https://api.stackexchange.com/docs/wrapper for details. Keep this in mind when -making any request. -""" -has_more = True -page_num = 0 -site_names = [] -user_ids = [] -while has_more: - if has_more: - page_num += 1 - # note that the "page" parameter must start at 1. See - # https://api.stackexchange.com/docs/paging. Also, the "types" parameter is used to - # get both the main sites (e.g. math.stackexchange.com) and their corresponding - # meta sites (e.g. math.meta.stackexchange.com) - r = requests.get(base_url + f"users/{args.user_id}/associated", - params={"key":api_key, - "filter":network_users_filter, - "page":str(page_num), - "pagesize":"100", - "types":"main_site;meta_site"}) - data = r.json() - # has_more will be set to False if there are no more pages to request, which - # breaks us out of this loop - has_more = data['has_more'] - # extract the site names and their corresponding user ids - for item in data['items']: - user_ids.append(item['user_id']) - site_url = item['site_url'] - # skip the first 8 characters in the site url to get the site name. This will be - # used later to query each site - site_names.append(site_url[8:]) - -print(f"Found {len(site_names)} Stack Exchange sites associated with "\ - f"https://stackexchange.com/users/{args.user_id}") - -""" -For questions, create a filter using the "/filters/create" method to get the -following fields from the "question" object type -(https://api.stackexchange.com/docs/types/question): -- answers -- body_markdown -- comments -- creation_date -- down_vote_count -- up_vote_count -- score - -Need the "shallow_user.display_name" field to return the owner associated with -a question or answer, since the return type is "shallow_user". If the owner is not -returned, then this is a community wiki post. - -For some reason, I can't request the "comment.body_markdown" field without also -requesting the "comment.body" field. If I try to do this, I won't get the -"comment.body_markdown" field in the response. -""" -r = requests.get(base_url + f"filters/create", - params={"key":api_key, - "include":".items;"\ - ".has_more;"\ - ".page;"\ - ".page_size;"\ - ".quota_max;"\ - ".quota_remaining;"\ - ".backoff;"\ - "shallow_user.display_name;"\ - "question.answers;"\ - "question.title;"\ - "question.body_markdown;"\ - "question.comments;"\ - "question.creation_date;"\ - "question.down_vote_count;"\ - "question.up_vote_count;"\ - "question.score;"\ - "question.owner;"\ - "question.link;"\ - "question.question_id;"\ - "answer.body_markdown;"\ - "answer.owner;"\ - "answer.comments;"\ - "answer.creation_date;"\ - "answer.is_accepted;"\ - "answer.down_vote_count;"\ - "answer.up_vote_count;"\ - "answer.score;"\ - "comment.body;"\ - "comment.body_markdown;"\ - "comment.creation_date;"\ - "comment.owner;"\ - "comment.score", - "base":"none", - "unsafe":"false"}) -questions_filter = r.json()['items'][0]['filter'] - -""" -For answers, create a filter using the "/filters/create" method to get the -following fields from the "answer" object type -(https://api.stackexchange.com/docs/types/answer): -- question_id (to go to the question and download it along with its answers) -""" -r = requests.get(base_url + f"filters/create", - params={"key":api_key, - "include":".items;"\ - ".has_more;"\ - ".page;"\ - ".page_size;"\ - ".quota_max;"\ - ".quota_remaining;"\ - ".backoff;"\ - "answer.question_id", - "base":"none", - "unsafe":"false"}) -answers_filter = r.json()['items'][0]['filter'] - -#%% step 2 -# create the top level directory and do nothing if it already exists -top_level_dir = pathlib.Path("q_and_a") -top_level_dir.mkdir(exist_ok=True) - -def write_question(target_dir,question): - # open the file "questions_dir/.md" to write to it. Note that the - # can be used to contruct the URL for the question as - # https:/// - # Also, we don't use the question title as the file name because the question - # title can contain invalid characters (such as "$" for LaTeX). We use the - # question ID instead. - # we are assuming that question IDs for each site are unique, so there is no - # need to deliberately avoid overwriting - fpath = (target_dir / str(question['question_id'])).with_suffix(".md") - # if the file already exists, then skip it to save time - if fpath.exists(): - return - # see https://stackoverflow.com/a/42495690/13809128 for why "encoding" parameter is - # needed - f = fpath.open(mode="w",encoding="utf-8") - # question metadata - f.write(f"Question downloaded from {question['link']}\\\n") - creation_datetime = datetime.datetime.fromtimestamp(question['creation_date'], - tz=datetime.timezone.utc) - # question may be a community wiki, in which case it has no owner - if "owner" in question: - if "display_name" in question["owner"]: - f.write(f"Question asked by {question['owner']['display_name']} on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - else: - f.write(f"Question is community-owned and was asked on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - else: - f.write(f"Question is community-owned and was asked on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - f.write(f"Number of up votes: {question['up_vote_count']}\\\n") - f.write(f"Number of down votes: {question['down_vote_count']}\\\n") - f.write(f"Score: {question['score']}\n") - # question title - f.write(f"# {question['title']}\n") - # question body - # See https://stackoverflow.com/q/2087370/13809128 for why "html.unescape" is - # needed. - f.write(html.unescape(f"{question['body_markdown']}\n")) - # comments to the question - # it is safer to use the ".get" method on the dict "question" because it may - # be the case that the 'comments' field does not exist, such as when there are - # no comments on the question - for i,comment in enumerate(question.get('comments',[])): - f.write(f"### Comment {i+1}\n") - creation_datetime = datetime.datetime.fromtimestamp(comment['creation_date'], - tz=datetime.timezone.utc) - if "owner" in comment: - if "display_name" in comment["owner"]: - f.write(f"Comment made by {comment['owner']['display_name']} on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - else: - f.write(f"Comment made anonymously and was asked on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - else: - f.write(f"Comment made anonymously and was asked on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - f.write(f"Comment score: {comment['score']}\n\n") - f.write(html.unescape(f"{comment['body_markdown']}\n")) - # answers to the question and the comments on each answer - for i,answer in enumerate(question.get('answers',[])): - f.write(f"## Answer {i+1}\n") - creation_datetime = datetime.datetime.fromtimestamp(answer['creation_date'], - tz=datetime.timezone.utc) - if "owner" in answer: - if "display_name" in answer["owner"]: - f.write(f"Answer by {answer['owner']['display_name']} on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - else: - f.write(f"Anonymous answer that was created on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - else: - f.write(f"Anonymous answer that was created on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - if answer['is_accepted']: - f.write("This is the accepted answer.\\\n") - else: - f.write("This is not the accepted answer.\\\n") - f.write(f"Number of up votes: {answer['up_vote_count']}\\\n") - f.write(f"Number of down votes: {answer['down_vote_count']}\\\n") - f.write(f"Score: {answer['score']}\n\n") - f.write(html.unescape(f"{answer['body_markdown']}\n")) - # comments on the answer - for j,comment in enumerate(answer.get('comments',[])): - f.write(f"### Comment {j+1}\n") - creation_datetime = datetime.datetime.fromtimestamp(comment['creation_date'], - tz=datetime.timezone.utc) - if "owner" in comment: - if "display_name" in comment["owner"]: - f.write(f"Comment made by {comment['owner']['display_name']} on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - else: - f.write(f"Comment made anonymously and was asked on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - else: - f.write(f"Comment made anonymously and was asked on "\ - f"{creation_datetime.strftime('%Y-%m-%d')} at "\ - f"{creation_datetime.strftime('%H:%M:%S')} UTC.\\\n") - f.write(f"Comment score: {comment['score']}\n\n") - f.write(html.unescape(f"{comment['body_markdown']}\n")) - # close the file after you are done writing - f.close() - -# iterate over the sites -for i,(site_name,user_id) in enumerate(zip(site_names,user_ids)): - print(f"Downloading and writing questions from site "\ - f"{i+1}/{len(site_names)} ({site_name})...",end="",flush=True) - #%% step 3(a) - # create the "questions" directory for this site - questions_dir = top_level_dir / site_name / "questions" - questions_dir.mkdir(parents=True,exist_ok=True) - has_more = True - page_num = 0 - while has_more: - if has_more: - page_num += 1 - #%% step 3(b) - r = requests.get(base_url + f"users/{user_id}/questions", - params={"key":api_key, - "site":site_name, - "filter":questions_filter, - "page":str(page_num), - "pagesize":"100"}) - data = r.json() - has_more = data['has_more'] - questions = data['items'] - #%% step 3(c) - # if there are no questions associated with this site, then "questions" - # will be an empty list, such that the following for loop will be skipped. - for question in questions: - write_question(questions_dir,question) - """ - According to https://api.stackexchange.com/docs/throttle: - - > A dynamic throttle is also in place on a per-method level. If an application - > receives a response with the backoff field set, it must wait that many seconds - > before hitting the same method again. For the purposes of throttling, all /me - > routes are considered to be identical to their /users/{ids} equivalent. Note - > that backoff is set based on a combination of factors, and may not be - > consistently returned for the same arguments to the same method. Additionally, - > all methods (even seemingly trivial ones) may return backoff. - - So, we will need to wait a certain amount of time if the "backoff" parameter is - returned in the response before making another request. - """ - if "backoff" in data: - print("We've made too many requests to the Stack Exchange API, so we will "\ - f"need to wait for {data['backoff']} seconds. Please be patient...",flush=True) - time.sleep(data['backoff'] + 1) # add a second just in case - print(f"Downloading and writing the remaining questions from {site_name}...", - end="",flush=True) - print(f"Done.") - print(f"Downloading and writing answers from site "\ - f"{i+1}/{len(site_names)} ({site_name})...",end="",flush=True) - #%% step 3(d) - # create the "answers" directory for this site - answers_dir = top_level_dir / site_name / "answers" - answers_dir.mkdir(parents=True,exist_ok=True) - has_more = True - page_num = 0 - while has_more: - backoff = False - if has_more: - page_num += 1 - #%% step 3(e) - r = requests.get(base_url + f"users/{user_id}/answers", - params={"key":api_key, - "site":site_name, - "filter":answers_filter, - "page":str(page_num), - "pagesize":"100"}) - data = r.json() - has_more = data['has_more'] - answers = data['items'] - # in case there are no answers associated with this site, skip it - if len(answers) == 0: - continue - if "backoff" in data: - backoff = True - backoff_time = data["backoff"] - #%% step 3(f) - question_ids = "" - for i,answer in enumerate(answers): - question_ids += f"{answer['question_id']}" - # don't put a semicolon at the end of the query string as this will throw - # an error - if i < len(answers) - 1: - question_ids += ";" - #%% step 3(g) - # get all the questions associated with these answers. Since there will always - # be a maximum of 100 answers, then there will always be 100 questions, and - # so we don't need to iterate through pages here - r = requests.get(base_url + f"questions/{question_ids}", - params={"key":api_key, - "site":site_name, - "filter":questions_filter, - "pagesize":"100"}) - data = r.json() - questions = data['items'] - if "backoff" in data: - if backoff: # if "backoff" is already needed from the previous method - backoff_time = max(data["backoff"],backoff_time) - else: - backoff = True - backoff_time = data["backoff"] - for question in questions: - write_question(answers_dir,question) - if backoff: - print("We've made too many requests to the Stack Exchange API, so we will "\ - f"need to wait for {backoff_time} seconds. Please be patient...",flush=True) - time.sleep(backoff_time + 1) # add a second just in case - print(f"Downloading and writing the remaining answers from {site_name}..." - ,end="",flush=True) - print(f"Done.") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ce995d1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,212 @@ +[build-system] +requires = ["setuptools >= 78.1.1"] +build-backend = "setuptools.build_meta" + +[project] +name = "stack-exchange-backup" +version = "2026.02.25" +description = "Download all your posts on the Stack Exchange network as Markdown files." +readme = "./README.md" +requires-python = ">= 3.12" +license = "MIT" +license-files = ["./LICENSE"] +authors = [{ name = "Wei Cheng", email = "weicheng018@gmail.com" }] +keywords = [ + "Stack Exchange", +] +classifiers = [ + "Development Status :: 3 - Alpha", + # "Development Status :: 4 - Beta", + # "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: End Users/Desktop", + "Operating System :: OS Independent", + "Operating System :: Microsoft :: Windows", + # "Operating System :: MacOS :: MacOS X", + "Operating System :: POSIX :: Linux", + # "Operating System :: POSIX :: BSD", + # "Operating System :: POSIX :: SunOS/Solaris", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + # "Programming Language :: Python :: 3.15", + "Programming Language :: Python :: Implementation :: CPython", + # "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: Python :: Implementation :: GraalPy", + "Topic :: System :: Archiving :: Backup", +] +urls.source = "https://github.com/9ao9ai9ar/stack-exchange-backup" +dependencies = [ + "attrs >= 25.4.0", # https://www.attrs.org/en/stable/changelog.html + "cattrs[pyyaml] >= 25.3.0", # https://catt.rs/en/stable/history.html + "requests >= 2.32.4", # https://requests.readthedocs.io/en/latest/community/updates/#release-history + "ruamel.yaml >= 0.18.16", # https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/CHANGES +] + +[dependency-groups] +dev = [ + "datamodel-code-generator >= 0.54.0", # https://github.com/koxudaxi/datamodel-code-generator/releases + "openapi-spec-validator >= 0.8.1", # https://github.com/python-openapi/openapi-spec-validator/releases + "pylint >= 4.0.5, < 5", # https://pylint.readthedocs.io/en/latest/whatsnew/4/index.html + "ruff >= 0.15.2", # https://github.com/astral-sh/ruff/releases +] +sit = [ + "pytest >= 9.0.0, < 10", # https://docs.pytest.org/en/stable/changelog.html +] +uat = [ + "memory-profiler >= 0.61.0", + "memray >= 1.19.1 ; sys_platform == 'linux' or sys_platform == 'darwin'", # https://bloomberg.github.io/memray/changelog.html +] +prod = [ + "bumpver >= 2025.1131", # https://github.com/mbarkhau/bumpver/blob/master/CHANGELOG.md + "validate-pyproject[all] >= 0.25", # https://validate-pyproject.readthedocs.io/en/latest/changelog.html +] +all = [ + { include-group = "dev" }, + { include-group = "sit" }, + { include-group = "uat" }, + { include-group = "prod" }, +] + +# https://github.com/mbarkhau/bumpver?tab=readme-ov-file#configuration +[tool.bumpver] +current_version = "2026.02.25" +version_pattern = "YYYY.0M.0D[-PATCH]" +commit = false + +[tool.bumpver.file_patterns] +"pyproject.toml" = [ + '^current_version = "{version}"', + '^version = "{version}"', +] + +# https://koxudaxi.github.io/datamodel-code-generator/cli-reference/quick-reference/ +[tool.datamodel-codegen] +additional-imports = "attr.dataclass" +formatters = [ + "isort", + "ruff-check", + "ruff-format", +] +input = "./resources/openapi/openapi.yaml" +input-file-type = "openapi" +output = "./src/stackexchange/generated/_model_openapi.py" +output-model-type = "dataclasses.dataclass" +## Typing customization: +enum-field-as-literal = "all" +## Field customization: +use-field-description = true +## Model customization: +collapse-root-models = true +disable-timestamp = true +keep-model-order = true +keyword-only = true +use-exact-imports = true +use-schema-description = true +## Template customization: +encoding = "utf-8" +use-double-quotes = true +## OpenAPI-only options: +openapi-scopes = [ + "schemas", + "parameters", + "paths", +] +use-operation-id-as-name = true + +# https://pylint.readthedocs.io/en/latest/user_guide/messages/messages_overview.html +[tool.pylint.main] +disable = [ + "missing-class-docstring", + "missing-function-docstring", + "missing-module-docstring", + "no-else-return", + "unused-argument", + "unused-wildcard-import", + "wildcard-import", +] +enable = [ + "useless-suppression", +] +fail-under = 9 +ignore-paths = [ + ".*/generated", +] +jobs = 0 +load-plugins = [ + "pylint.extensions.code_style", +] +output-format = "colorized" + +[tool.pylint.design] +max-args = 5 # Default +max-attributes = 7 # Default +max-locals = 15 # Default +min-public-methods = 2 # Default + +[tool.pylint.format] +max-line-length = 99 + +# https://microsoft.github.io/pyright/#/configuration?id=type-check-diagnostics-settings +[tool.pyright] +include = [ + "./src", + "./tests", +] +exclude = [ + "**/__pycache__", +] +typeCheckingMode = "strict" +deprecateTypingAliases = true +reportUnnecessaryTypeIgnoreComment = true +# "error" in "strict" mode +reportUnknownParameterType = false +reportUnknownArgumentType = false +reportUnknownLambdaType = false +reportUnknownVariableType = false +reportUnknownMemberType = false +reportMissingParameterType = false +reportMissingTypeArgument = false + +# https://docs.pytest.org/en/stable/reference/reference.html#configuration-options +[tool.pytest] +addopts = [ + "-vvr aP", + "--tb=short", + "--capture=sys", +] +strict = true +testpaths = ["./tests"] + +# https://docs.astral.sh/ruff/settings/ +[tool.ruff] +line-length = 79 +extend-exclude = [ + "**/generated", +] + +[tool.ruff.lint] +ignore = [ + "F403", # undefined-local-with-import-star + "F405", # undefined-local-with-import-star-usage +] # https://docs.astral.sh/ruff/rules/ + +# https://docs.astral.sh/uv/reference/settings/ +[tool.uv] +native-tls = true + +[tool.uv.pip] +emit-index-url = true +generate-hashes = true +link-mode = "copy" # https://github.com/astral-sh/uv/issues/7918 +no-binary = ["stack-exchange-backup"] +#no-build = true # prefer-binary: https://github.com/astral-sh/uv/issues/1794 +reinstall = true +#require-hashes = true # Doesn't work for editable installs: https://github.com/pypa/pip/issues/4995 +strict = true +universal = true +upgrade = true +verify-hashes = true diff --git a/release.ps1 b/release.ps1 new file mode 100755 index 0000000..e02e555 --- /dev/null +++ b/release.ps1 @@ -0,0 +1,44 @@ +#!/bin/sh +#Requires -Version 7 + +# This is a polyglot script that runs in both a POSIX compliant shell and PowerShell 7+. +# It assumes the following prerequisites have been met: +# 1. uv is installed via the standalone installer and in PATH +# 2. Node.js is installed and in PATH +# 3. The current working directory is set to the project root +# 4. The environment variable SC_GITHUB_TOKEN is set to a valid GitHub token +# 5. A compatible Python virtual environment is activated +# The shell commands are separated into blocks by the stages in the software testing life cycle: +# 1. development +# 1.1. dependency management +# 1.2. code generation +# 1.3. linting +# 2. system integration testing +# 3. user acceptance testing +# 4. production + +uv self update && +uv tool install --upgrade security-constraints && +npm install pyright@latest && +uv tool run security-constraints --min-severity moderate --output ./constraints.txt && +uv pip compile --constraints ./constraints.txt --output-file ./requirements.txt ./pyproject.toml && +uv pip sync ./requirements.txt && +uv pip install --editable . --constraints ./constraints.txt --constraints ./requirements.txt --group all && + +python -m openapi_spec_validator --subschema-errors all --validation-errors all ./resources/openapi/openapi.yaml && +python -m datamodel_code_generator && + +python -m ruff check && +python -m pylint ./src/ ./tests/ && +npm exec pyright && + +python -m pytest && + +python -m mprof run --include-children ./src/stackexchange/backup.py --account-id 8 && +python -m mprof peak && +python -m mprof clean && + +python -m bumpver update --patch --no-fetch && +python -m validate_pyproject ./pyproject.toml && + +$(exit) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..70152c4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,238 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile --constraints ./constraints.txt --output-file ./requirements.txt ./pyproject.toml +--index-url https://pypi.org/simple + +attrs==25.4.0 \ + --hash=sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11 \ + --hash=sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373 + # via + # stack-exchange-backup (./pyproject.toml) + # cattrs +cattrs==26.1.0 \ + --hash=sha256:d1e0804c42639494d469d08d4f26d6b9de9b8ab26b446db7b5f8c2e97f7c3096 \ + --hash=sha256:fa239e0f0ec0715ba34852ce813986dfed1e12117e209b816ab87401271cdd40 + # via stack-exchange-backup (./pyproject.toml) +certifi==2026.2.25 \ + --hash=sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa \ + --hash=sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7 + # via + # -c ./constraints.txt + # requests +charset-normalizer==3.4.4 \ + --hash=sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad \ + --hash=sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93 \ + --hash=sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394 \ + --hash=sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89 \ + --hash=sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc \ + --hash=sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86 \ + --hash=sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63 \ + --hash=sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d \ + --hash=sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f \ + --hash=sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8 \ + --hash=sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0 \ + --hash=sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505 \ + --hash=sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161 \ + --hash=sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af \ + --hash=sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152 \ + --hash=sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318 \ + --hash=sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72 \ + --hash=sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4 \ + --hash=sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e \ + --hash=sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3 \ + --hash=sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576 \ + --hash=sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c \ + --hash=sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1 \ + --hash=sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8 \ + --hash=sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1 \ + --hash=sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2 \ + --hash=sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44 \ + --hash=sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26 \ + --hash=sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88 \ + --hash=sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016 \ + --hash=sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede \ + --hash=sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf \ + --hash=sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a \ + --hash=sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc \ + --hash=sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0 \ + --hash=sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84 \ + --hash=sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db \ + --hash=sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1 \ + --hash=sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7 \ + --hash=sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed \ + --hash=sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8 \ + --hash=sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133 \ + --hash=sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e \ + --hash=sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef \ + --hash=sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14 \ + --hash=sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2 \ + --hash=sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0 \ + --hash=sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d \ + --hash=sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828 \ + --hash=sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f \ + --hash=sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf \ + --hash=sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6 \ + --hash=sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328 \ + --hash=sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090 \ + --hash=sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa \ + --hash=sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381 \ + --hash=sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c \ + --hash=sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb \ + --hash=sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc \ + --hash=sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a \ + --hash=sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec \ + --hash=sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc \ + --hash=sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac \ + --hash=sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e \ + --hash=sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313 \ + --hash=sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569 \ + --hash=sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3 \ + --hash=sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d \ + --hash=sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525 \ + --hash=sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894 \ + --hash=sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3 \ + --hash=sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9 \ + --hash=sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a \ + --hash=sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9 \ + --hash=sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14 \ + --hash=sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25 \ + --hash=sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50 \ + --hash=sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf \ + --hash=sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1 \ + --hash=sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3 \ + --hash=sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac \ + --hash=sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e \ + --hash=sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815 \ + --hash=sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c \ + --hash=sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6 \ + --hash=sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6 \ + --hash=sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e \ + --hash=sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4 \ + --hash=sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84 \ + --hash=sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69 \ + --hash=sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15 \ + --hash=sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191 \ + --hash=sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0 \ + --hash=sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897 \ + --hash=sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd \ + --hash=sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2 \ + --hash=sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794 \ + --hash=sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d \ + --hash=sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074 \ + --hash=sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3 \ + --hash=sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224 \ + --hash=sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838 \ + --hash=sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a \ + --hash=sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d \ + --hash=sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d \ + --hash=sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f \ + --hash=sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8 \ + --hash=sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490 \ + --hash=sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966 \ + --hash=sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9 \ + --hash=sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3 \ + --hash=sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e \ + --hash=sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608 + # via requests +idna==3.11 \ + --hash=sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea \ + --hash=sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902 + # via + # -c ./constraints.txt + # requests +pyyaml==6.0.3 \ + --hash=sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c \ + --hash=sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a \ + --hash=sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3 \ + --hash=sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956 \ + --hash=sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6 \ + --hash=sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c \ + --hash=sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65 \ + --hash=sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a \ + --hash=sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0 \ + --hash=sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b \ + --hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \ + --hash=sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6 \ + --hash=sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7 \ + --hash=sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e \ + --hash=sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007 \ + --hash=sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310 \ + --hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \ + --hash=sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9 \ + --hash=sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295 \ + --hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \ + --hash=sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0 \ + --hash=sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e \ + --hash=sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac \ + --hash=sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9 \ + --hash=sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7 \ + --hash=sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35 \ + --hash=sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb \ + --hash=sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b \ + --hash=sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69 \ + --hash=sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5 \ + --hash=sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b \ + --hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \ + --hash=sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369 \ + --hash=sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd \ + --hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \ + --hash=sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198 \ + --hash=sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065 \ + --hash=sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c \ + --hash=sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c \ + --hash=sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764 \ + --hash=sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196 \ + --hash=sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b \ + --hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \ + --hash=sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac \ + --hash=sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8 \ + --hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \ + --hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \ + --hash=sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3 \ + --hash=sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5 \ + --hash=sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4 \ + --hash=sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b \ + --hash=sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf \ + --hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \ + --hash=sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702 \ + --hash=sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8 \ + --hash=sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788 \ + --hash=sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da \ + --hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \ + --hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \ + --hash=sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c \ + --hash=sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba \ + --hash=sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f \ + --hash=sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917 \ + --hash=sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5 \ + --hash=sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26 \ + --hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \ + --hash=sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b \ + --hash=sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be \ + --hash=sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c \ + --hash=sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3 \ + --hash=sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6 \ + --hash=sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926 \ + --hash=sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0 + # via + # -c ./constraints.txt + # cattrs +requests==2.32.5 \ + --hash=sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6 \ + --hash=sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf + # via + # -c ./constraints.txt + # stack-exchange-backup (./pyproject.toml) +ruamel-yaml==0.19.1 \ + --hash=sha256:27592957fedf6e0b62f281e96effd28043345e0e66001f97683aa9a40c667c93 \ + --hash=sha256:53eb66cd27849eff968ebf8f0bf61f46cdac2da1d1f3576dd4ccee9b25c31993 + # via stack-exchange-backup (./pyproject.toml) +typing-extensions==4.15.0 \ + --hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \ + --hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548 + # via cattrs +urllib3==2.6.3 \ + --hash=sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed \ + --hash=sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4 + # via + # -c ./constraints.txt + # requests diff --git a/resources/openapi/components/response_wrapper.yaml b/resources/openapi/components/response_wrapper.yaml new file mode 100644 index 0000000..270b6d5 --- /dev/null +++ b/resources/openapi/components/response_wrapper.yaml @@ -0,0 +1,32 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/wrapper' +type: object +properties: + backoff: + type: integer + error_id: + type: integer + description: refers to an Error + error_message: + type: string + error_name: + type: string + has_more: + type: boolean + items: + type: array + description: an array of the type found in type + items: + type: object + page: + type: integer + page_size: + type: integer + quota_max: + type: integer + quota_remaining: + type: integer + total: + type: integer + type: # discriminator + type: string \ No newline at end of file diff --git a/resources/openapi/components/schemas/answer.yaml b/resources/openapi/components/schemas/answer.yaml new file mode 100644 index 0000000..c0ad31b --- /dev/null +++ b/resources/openapi/components/schemas/answer.yaml @@ -0,0 +1,103 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/answer' +description: |+ + This type represents an answer to a question on one of the Stack Exchange sites, such as this famous answer of bobince's. + + As on the question page, it is possible to fetch the comments on an answer as part of a call; though this is not done by default. + + The upvoted, downvoted, and accepted fields can only be queried for with an access_token with the private_info scope. +type: object +properties: + accepted: + type: boolean + answer_id: + type: integer + description: refers to an Answer + awarded_bounty_amount: + type: integer + awarded_bounty_users: + type: array + items: + $ref: './shallow_user.yaml' + body: + type: string + description: unchanged in unsafe filters + body_markdown: + type: string + format: commonmark + can_comment: + type: boolean + can_edit: + type: boolean + can_flag: + type: boolean + can_suggest_edit: + type: boolean + collectives: + type: array + items: + $ref: './collective.yaml' + comment_count: + type: integer + comments: + type: array + items: + $ref: './comment.yaml' + community_owned_date: + type: integer + format: int64 + content_license: + type: string + creation_date: + type: integer + format: int64 + down_vote_count: + type: integer + downvoted: + type: boolean + description: private_info + is_accepted: + type: boolean + last_activity_date: + type: integer + format: int64 + last_edit_date: + type: integer + format: int64 + last_editor: + $ref: './shallow_user.yaml' + link: + type: string + description: unchanged in unsafe filters + locked_date: + type: integer + format: int64 + owner: + $ref: './shallow_user.yaml' + posted_by_collectives: + type: array + items: + $ref: './collective.yaml' + question_id: + type: integer + description: refers to a Question + recommendations: + type: array + items: + $ref: './collective_recommendation.yaml' + score: + type: integer + share_link: + type: string + description: unchanged in unsafe filters + tags: + type: array + items: + type: string + title: + type: string + up_vote_count: + type: integer + upvoted: + type: boolean + description: private_info \ No newline at end of file diff --git a/resources/openapi/components/schemas/badge_count.yaml b/resources/openapi/components/schemas/badge_count.yaml new file mode 100644 index 0000000..21a8c11 --- /dev/null +++ b/resources/openapi/components/schemas/badge_count.yaml @@ -0,0 +1,12 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/badge-count' +description: |+ + This type represents the total Badges, segregated by rank, a user has earned. +type: object +properties: + bronze: + type: integer + gold: + type: integer + silver: + type: integer \ No newline at end of file diff --git a/resources/openapi/components/schemas/closed_details.yaml b/resources/openapi/components/schemas/closed_details.yaml new file mode 100644 index 0000000..bc0e231 --- /dev/null +++ b/resources/openapi/components/schemas/closed_details.yaml @@ -0,0 +1,22 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/closed-details' +description: |+ + This type represents details about a question closure. +type: object +properties: + by_users: + type: array + items: + $ref: './shallow_user.yaml' + description: + type: string + description: unchanged in unsafe filters + on_hold: + type: boolean + original_questions: + type: array + items: + $ref: './original_question.yaml' + reason: + type: string + description: unchanged in unsafe filters \ No newline at end of file diff --git a/resources/openapi/components/schemas/collective.yaml b/resources/openapi/components/schemas/collective.yaml new file mode 100644 index 0000000..78436b1 --- /dev/null +++ b/resources/openapi/components/schemas/collective.yaml @@ -0,0 +1,22 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/collective' +description: |+ + This type represents a collective on a Stack Exchange site. +type: object +properties: + description: + type: string + external_links: + type: array + items: + $ref: './collective_external_link.yaml' + link: + type: string + name: + type: string + slug: + type: string + tags: + type: array + items: + type: string \ No newline at end of file diff --git a/resources/openapi/components/schemas/collective_external_link.yaml b/resources/openapi/components/schemas/collective_external_link.yaml new file mode 100644 index 0000000..e026778 --- /dev/null +++ b/resources/openapi/components/schemas/collective_external_link.yaml @@ -0,0 +1,18 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/collective-external-link' +description: |+ + Represents a collective's external link. +type: object +properties: + link: + type: string + type: + type: string + enum: + - website + - twitter + - github + - facebook + - instagram + - support + - linkedin \ No newline at end of file diff --git a/resources/openapi/components/schemas/collective_recommendation.yaml b/resources/openapi/components/schemas/collective_recommendation.yaml new file mode 100644 index 0000000..5d0caf3 --- /dev/null +++ b/resources/openapi/components/schemas/collective_recommendation.yaml @@ -0,0 +1,11 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/collective-recommendation' +description: |+ + Represents a collective's recommendation of an answer. +type: object +properties: + collective: + $ref: './collective.yaml' + creation_date: + type: integer + format: int64 \ No newline at end of file diff --git a/resources/openapi/components/schemas/comment.yaml b/resources/openapi/components/schemas/comment.yaml new file mode 100644 index 0000000..33c8b43 --- /dev/null +++ b/resources/openapi/components/schemas/comment.yaml @@ -0,0 +1,49 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/comment' +description: |+ + All Questions and Answers on a Stack Exchange site can be commented on, and this type represents those comments. + + Comments can also be optionally directed at users, when this is the case the reply_to_user property is set (if it is requested in the current filter). + + The upvoted field can only be queried for with an access_token with the private_info scope. +type: object +properties: + body: + type: string + description: unchanged in unsafe filters + body_markdown: + type: string + format: commonmark + can_flag: + type: boolean + comment_id: + type: integer + description: refers to a Comment + content_license: + type: string + creation_date: + type: integer + format: int64 + edited: + type: boolean + link: + type: string + description: unchanged in unsafe filters + owner: + $ref: './shallow_user.yaml' + post_id: + type: integer + description: refers to a Post + post_type: + type: string + enum: + - question + - answer + - article + reply_to_user: + $ref: './shallow_user.yaml' + score: + type: integer + upvoted: + type: boolean + description: private_info \ No newline at end of file diff --git a/resources/openapi/components/schemas/error.yaml b/resources/openapi/components/schemas/error.yaml new file mode 100644 index 0000000..b3764e8 --- /dev/null +++ b/resources/openapi/components/schemas/error.yaml @@ -0,0 +1,17 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/error' +description: |+ + This type is used to describe the errors that can be returned by the API. + + It is not expected that many applications will concern themselves with this type. It is made available for development and testing purposes. + + Note that the field-name description for each record returned differs from the field-name error_message that is part of the common response wrapper on individual API requests. +type: object +properties: + description: + type: string + error_id: + type: integer + description: refers to an Error + error_name: + type: string \ No newline at end of file diff --git a/resources/openapi/components/schemas/filter.yaml b/resources/openapi/components/schemas/filter.yaml new file mode 100644 index 0000000..e05195e --- /dev/null +++ b/resources/openapi/components/schemas/filter.yaml @@ -0,0 +1,20 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/filter' +description: |+ + This type describes a filter on the API. + + When passing a filter to methods in the API, it should be referred to by name alone. +type: object +properties: + filter: + type: string + filter_type: + type: string + enum: + - safe + - unsafe + - invalid + included_fields: + type: array + items: + type: string \ No newline at end of file diff --git a/resources/openapi/components/schemas/migration_info.yaml b/resources/openapi/components/schemas/migration_info.yaml new file mode 100644 index 0000000..236c974 --- /dev/null +++ b/resources/openapi/components/schemas/migration_info.yaml @@ -0,0 +1,14 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/migration-info' +description: |+ + This type represents a question's migration to or from a different site in the Stack Exchange network. +type: object +properties: + on_date: + type: integer + format: int64 + other_site: + $ref: './site.yaml' + question_id: + type: integer + description: refers to a Question \ No newline at end of file diff --git a/resources/openapi/components/schemas/network_post.yaml b/resources/openapi/components/schemas/network_post.yaml new file mode 100644 index 0000000..1285e62 --- /dev/null +++ b/resources/openapi/components/schemas/network_post.yaml @@ -0,0 +1,19 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/network-post' +description: |+ + This type represents a post on the network, however it is greatly reduced when compared to the full Post type to reduce the amount of work that needs to be done to fetch it from multiple sites in the network. +type: object +properties: + post_id: + type: integer + description: refers to a Post + post_type: + type: string + enum: + - question + - answer + - article + score: + type: integer + title: + type: string \ No newline at end of file diff --git a/resources/openapi/components/schemas/network_user.yaml b/resources/openapi/components/schemas/network_user.yaml new file mode 100644 index 0000000..a4c9c49 --- /dev/null +++ b/resources/openapi/components/schemas/network_user.yaml @@ -0,0 +1,46 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/network-user' +description: |+ + This type represents a user, however it is greatly reduced when compared to the full User type to reduce the amount of work that needs to be done to fetch it from multiple sites in the network. +type: object +properties: + account_id: + type: integer + answer_count: + type: integer + badge_counts: + $ref: './badge_count.yaml' + creation_date: + type: integer + format: int64 + last_access_date: + type: integer + format: int64 + question_count: + type: integer + reputation: + type: integer + site_name: + type: string + site_url: + type: string + format: uri + top_answers: + type: array + items: + $ref: './network_post.yaml' + top_questions: + type: array + items: + $ref: './network_post.yaml' + user_id: + type: integer + description: refers to a User + user_type: + type: string + enum: + - unregistered + - registered + - moderator + - team_admin + - does_not_exist \ No newline at end of file diff --git a/resources/openapi/components/schemas/notice.yaml b/resources/openapi/components/schemas/notice.yaml new file mode 100644 index 0000000..e207072 --- /dev/null +++ b/resources/openapi/components/schemas/notice.yaml @@ -0,0 +1,14 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/notice' +description: |+ + Represents a notice on a post. +type: object +properties: + body: + type: string + description: unchanged in unsafe filters + creation_date: + type: integer + format: int64 + owner_user_id: + type: integer \ No newline at end of file diff --git a/resources/openapi/components/schemas/original_question.yaml b/resources/openapi/components/schemas/original_question.yaml new file mode 100644 index 0000000..459e5e3 --- /dev/null +++ b/resources/openapi/components/schemas/original_question.yaml @@ -0,0 +1,17 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/original-question' +description: |+ + This type represents an "original" question that another was closed as a duplicate of. + + This type is mostly analogous to a row in the "gray box" that appears in questions closed a dupes which lists original questions. +type: object +properties: + accepted_answer_id: + type: integer + answer_count: + type: integer + question_id: + type: integer + description: refers to a Question + title: + type: string \ No newline at end of file diff --git a/resources/openapi/components/schemas/question.yaml b/resources/openapi/components/schemas/question.yaml new file mode 100644 index 0000000..003ed0c --- /dev/null +++ b/resources/openapi/components/schemas/question.yaml @@ -0,0 +1,136 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/question' +description: |+ + This type represents a question on one of the Stack Exchange sites, such as this famous RegEx question. + + This type is heavily inspired by the question page itself, and can optionally return comments and answers accordingly. + + The upvoted, downvoted, and favorited fields can only be queried for with an access_token with the private_info scope. +type: object +properties: + accepted_answer_id: + type: integer + answer_count: + type: integer + answers: + type: array + items: + $ref: './answer.yaml' + body: + type: string + description: unchanged in unsafe filters + body_markdown: + type: string + format: commonmark + bounty_amount: + type: integer + bounty_closes_date: + type: integer + format: int64 + bounty_user: + $ref: './shallow_user.yaml' + can_answer: + type: boolean + can_close: + type: boolean + can_comment: + type: boolean + can_edit: + type: boolean + can_flag: + type: boolean + can_suggest_edit: + type: boolean + close_vote_count: + type: integer + closed_date: + type: integer + format: int64 + closed_details: + $ref: './closed_details.yaml' + closed_reason: + type: string + collectives: + type: array + items: + $ref: './collective.yaml' + comment_count: + type: integer + comments: + type: array + items: + $ref: './comment.yaml' + community_owned_date: + type: integer + format: int64 + content_license: + type: string + creation_date: + type: integer + format: int64 + delete_vote_count: + type: integer + down_vote_count: + type: integer + downvoted: + type: boolean + description: private_info + favorite_count: + type: integer + favorited: + type: boolean + description: private_info + is_answered: + type: boolean + last_activity_date: + type: integer + format: int64 + last_edit_date: + type: integer + format: int64 + last_editor: + $ref: './shallow_user.yaml' + link: + type: string + description: unchanged in unsafe filters + locked_date: + type: integer + format: int64 + migrated_from: + $ref: './migration_info.yaml' + migrated_to: + $ref: './migration_info.yaml' + notice: + $ref: './notice.yaml' + owner: + $ref: './shallow_user.yaml' + posted_by_collectives: + type: array + items: + $ref: './collective.yaml' + protected_date: + type: integer + format: int64 + question_id: + type: integer + description: refers to a Question + reopen_vote_count: + type: integer + score: + type: integer + share_link: + type: string + description: unchanged in unsafe filters + tags: + type: array + items: + type: string + title: + type: string + up_vote_count: + type: integer + upvoted: + type: boolean + description: private_info + view_count: + type: integer \ No newline at end of file diff --git a/resources/openapi/components/schemas/related_site.yaml b/resources/openapi/components/schemas/related_site.yaml new file mode 100644 index 0000000..d433752 --- /dev/null +++ b/resources/openapi/components/schemas/related_site.yaml @@ -0,0 +1,21 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/related-site' +description: |+ + This type represents a site that is related in some way to another site. + + Examples include chat and meta, and parent sites. + + Applications should be able to gracefully handle the additon of new related site types. +type: object +properties: + api_site_parameter: + type: string + name: + type: string + relation: + type: string + description: one of parent, meta, or chat, but new options may be added. + site_url: + type: string + format: uri + description: unchanged in unsafe filters \ No newline at end of file diff --git a/resources/openapi/components/schemas/shallow_user.yaml b/resources/openapi/components/schemas/shallow_user.yaml new file mode 100644 index 0000000..255c2e7 --- /dev/null +++ b/resources/openapi/components/schemas/shallow_user.yaml @@ -0,0 +1,35 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/shallow-user' +description: |+ + This type represents a user, but omits many of the fields found on the full User type. + + This type is mostly analogous to the "user card" found on many pages (like the question page) on a Stack Exchange site. +type: object +properties: + accept_rate: + type: integer + account_id: + type: integer + badge_counts: + $ref: './badge_count.yaml' + display_name: + type: string + link: + type: string + description: unchanged in unsafe filters + profile_image: + type: string + description: unchanged in unsafe filters + reputation: + type: integer + user_id: + type: integer + description: refers to a User + user_type: + type: string + enum: + - unregistered + - registered + - moderator + - team_admin + - does_not_exist \ No newline at end of file diff --git a/resources/openapi/components/schemas/site.yaml b/resources/openapi/components/schemas/site.yaml new file mode 100644 index 0000000..acecd42 --- /dev/null +++ b/resources/openapi/components/schemas/site.yaml @@ -0,0 +1,63 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/site' +description: |+ + This type represents a site in the Stack Exchange network. +type: object +properties: + aliases: + type: array + items: + type: string + api_site_parameter: + type: string + audience: + type: string + closed_beta_date: + type: integer + format: int64 + favicon_url: + type: string + format: uri + high_resolution_icon_url: + type: string + format: uri + icon_url: + type: string + format: uri + launch_date: + type: integer + format: int64 + logo_url: + type: string + format: uri + markdown_extensions: + type: array + description: an array of 'MathJax', 'Prettify', 'Balsamiq' or 'jTab' strings, but new options may be added. + items: + type: string + name: + type: string + open_beta_date: + type: integer + format: int64 + related_sites: + type: array + items: + $ref: './related_site.yaml' + site_state: + type: string + enum: + - normal + - closed_beta + - open_beta + - linked_meta + site_type: + type: string + description: one of main_site or meta_site, but new options may be added. + site_url: + type: string + format: uri + styling: + $ref: './styling.yaml' + twitter_account: + type: string \ No newline at end of file diff --git a/resources/openapi/components/schemas/styling.yaml b/resources/openapi/components/schemas/styling.yaml new file mode 100644 index 0000000..4fcf980 --- /dev/null +++ b/resources/openapi/components/schemas/styling.yaml @@ -0,0 +1,18 @@ +externalDocs: + url: 'https://api.stackexchange.com/docs/types/styling' +description: |+ + This type represents some stylings of a site in the Stack Exchange network. + + These stylings are meant to allow developers to subtly vary the presentation of resources in their applications so as to indicate to users the original source site. + + Applications should be able to gracefully handle these styles changes, though they can safely assume that these style changes are infrequent. + + Note that colors can be returned either as six or three digit hex triplets. +type: object +properties: + link_color: + type: string + tag_background_color: + type: string + tag_foreground_color: + type: string \ No newline at end of file diff --git a/resources/openapi/openapi.yaml b/resources/openapi/openapi.yaml new file mode 100644 index 0000000..8e6d419 --- /dev/null +++ b/resources/openapi/openapi.yaml @@ -0,0 +1,358 @@ +openapi: 3.1.0 + +info: + title: Stack Exchange API + termsOfService: 'https://stackoverflow.com/legal/api-terms-of-use' + version: '2.3' + +servers: + - url: 'https://api.stackexchange.com/2.3' + +components: + schemas: + # Top Level Types + Answer: + $ref: './components/schemas/answer.yaml' + Collective: + $ref: './components/schemas/collective.yaml' + Comment: + $ref: './components/schemas/comment.yaml' + Error: + $ref: './components/schemas/error.yaml' + Filter: + $ref: './components/schemas/filter.yaml' + NetworkUser: + $ref: './components/schemas/network_user.yaml' + Question: + $ref: './components/schemas/question.yaml' + Site: + $ref: './components/schemas/site.yaml' + # Member Types + BadgeCount: + $ref: './components/schemas/badge_count.yaml' + ClosedDetails: + $ref: './components/schemas/closed_details.yaml' + CollectiveExternalLink: + $ref: './components/schemas/collective_external_link.yaml' + CollectiveRecommendation: + $ref: './components/schemas/collective_recommendation.yaml' + MigrationInfo: + $ref: './components/schemas/migration_info.yaml' + NetworkPost: + $ref: './components/schemas/network_post.yaml' + Notice: + $ref: './components/schemas/notice.yaml' + OriginalQuestion: + $ref: './components/schemas/original_question.yaml' + RelatedSite: + $ref: './components/schemas/related_site.yaml' + ShallowUser: + $ref: './components/schemas/shallow_user.yaml' + Styling: + $ref: './components/schemas/styling.yaml' + # Response Type Template + ResponseWrapper: + $ref: './components/response_wrapper.yaml' + parameters: + Filter: + name: filter + in: query + schema: + externalDocs: + url: 'https://api.stackexchange.com/docs/filters' + type: string + description: |+ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + Paging: + name: paging + in: query + schema: + externalDocs: + url: 'https://api.stackexchange.com/docs/paging' + type: object + properties: + page: + type: integer + format: int32 + description: Minimum and maximum values determined from testing. + minimum: 1 + maximum: 2147483647 + default: 1 + pagesize: + type: integer + format: int32 + description: |+ + The effective minimum and maximum are, respectively, -2^31 and 100, + with the sole exception of the /sites route for which the effective maximum is 2^31 - 2, + beyond which a bad_parameter error is returned. + Negative integers are treated as 0. + minimum: 0 + maximum: 100 + default: 30 + Complex: + name: complex + in: query + schema: + externalDocs: + url: 'https://api.stackexchange.com/docs/min-max' + type: object + properties: + sort: + oneOf: + - type: string + - type: string + description: |+ + activity - last_activity_date + creation - creation_date + votes - score + enum: + - activity + - creation + - votes + default: activity + min: + type: + - integer + max: + type: + - integer + fromdate: + type: integer + format: int64 + description: Defines the lower bound of creation_date. + todate: + type: integer + format: int64 + description: Defines the upper bound of creation_date. + order: + type: string + description: Omitted in the documentation, but is always found to accompany other properties in the group. + enum: + - asc + - desc + Ids: + name: ids + in: path + required: true + schema: + externalDocs: + url: 'https://api.stackexchange.com/docs/vectors' + type: array + description: |+ + Most methods that take ids in the API will take up to 100 of them in a single go. + When passing a vector, separate each id with a semicolon. + Vectors are not restricted to integer values, some are lists of strings ({tags}) or guids ({ids}). + maxItems: 100 + items: + type: + - integer + example: 1;2;3 + Site: + name: site + in: query + schema: + externalDocs: + url: 'https://api.stackexchange.com/docs?tab=category#docs' + type: string + description: |+ + Per-site methods operates on a single site at a time, identified by the site parameter. + This parameter can be the full domain name (ie. "stackoverflow.com"), + or a short form identified by api_site_parameter on the Site object. + responses: + default: + description: Default + content: + application/json: + schema: + $ref: '#/components/schemas/ResponseWrapper' + securitySchemes: + ApiKeyAuth: + type: http + description: > + More information can be found at https://api.stackexchange.com/docs/authentication. + scheme: bearer + +paths: + /questions/{ids}: + get: + tags: + - Per-Site Methods + - Questions + description: Returns the questions identified in {ids}. + externalDocs: + url: 'https://api.stackexchange.com/docs/questions-by-ids' + operationId: questions-by-ids + parameters: + - $ref: '#/components/parameters/Ids' + - $ref: '#/components/parameters/Site' + - $ref: '#/components/parameters/Filter' + - $ref: '#/components/parameters/Paging' + - $ref: '#/components/parameters/Complex' + /users/{ids}/answers: + get: + tags: + - Per-Site Methods + - Users + description: Returns the answers the users in {ids} have posted. + externalDocs: + url: 'https://api.stackexchange.com/docs/answers-on-users' + operationId: answers-on-users + parameters: + - $ref: '#/components/parameters/Ids' + - $ref: '#/components/parameters/Site' + - $ref: '#/components/parameters/Filter' + - $ref: '#/components/parameters/Paging' + - $ref: '#/components/parameters/Complex' + /users/{ids}/questions: + get: + tags: + - Per-Site Methods + - Users + description: Gets the questions asked by the users in {ids}. + externalDocs: + url: 'https://api.stackexchange.com/docs/questions-on-users' + operationId: questions-on-users + parameters: + - $ref: '#/components/parameters/Ids' + - $ref: '#/components/parameters/Site' + - $ref: '#/components/parameters/Filter' + - $ref: '#/components/parameters/Paging' + - $ref: '#/components/parameters/Complex' + /errors/{id}: + get: + tags: + - Network Methods + - Errors + description: This method allows you to generate an error. + externalDocs: + url: 'https://api.stackexchange.com/docs/simulate-error' + operationId: simulate-error + parameters: + - name: id + in: path + required: true + schema: + type: integer + - $ref: '#/components/parameters/Filter' + /filters/create: + get: + tags: + - Network Methods + - Filters + description: Creates a new filter given a list of includes, excludes, a base filter, and whether or not this filter should be "unsafe". + externalDocs: + url: 'https://api.stackexchange.com/docs/create-filter' + operationId: create-filter + parameters: + - $ref: '#/components/parameters/Filter' + - name: include + in: query + schema: + type: array + items: + type: string + example: .page;.pagesize + - name: exclude + in: query + schema: + type: array + items: + type: string + example: .quota_max;.quota_remaining + - name: base + in: query + schema: + type: string + - name: unsafe + in: query + schema: + type: boolean + default: false + /filters/{filters}: + get: + tags: + - Network Methods + - Filters + description: Returns the fields included by the given filters, and the "safeness" of those filters. + externalDocs: + url: 'https://api.stackexchange.com/docs/read-filter' + operationId: read-filter + parameters: + - name: filters + in: path + required: true + schema: + type: array + maxItems: 20 + items: + type: string + example: default;withbody + - $ref: '#/components/parameters/Filter' + - $ref: '#/components/parameters/Paging' + /sites: + get: + tags: + - Network Methods + - Sites + description: Returns all sites in the network. + externalDocs: + url: 'https://api.stackexchange.com/docs/sites' + operationId: sites + parameters: + - $ref: '#/components/parameters/Filter' + - $ref: '#/components/parameters/Paging' + /users/{ids}/associated: + get: + tags: + - Network Methods + - Users + description: Returns all of a user's associated accounts, given their account_ids in {ids}. + externalDocs: + url: 'https://api.stackexchange.com/docs/associated-users' + operationId: associated-users + parameters: + - $ref: '#/components/parameters/Ids' + - $ref: '#/components/parameters/Filter' + - $ref: '#/components/parameters/Paging' + - name: types + in: query + schema: + type: array + items: + type: string + enum: + - main_site + - meta_site + description: Specify, semicolon delimited, main_site or meta_site to filter by site. + +security: + - ApiKeyAuth: [] + +tags: + - name: Per-Site Methods + - name: Answers + - name: Badges + - name: Collectives + - name: Comments + - name: Events + - name: Info + - name: Posts + - name: Privileges + - name: Questions + - name: Revisions + - name: Search + - name: Suggested Edits + - name: Tags + - name: Users + - name: Network Methods + - name: Access Tokens + - name: Achievements + - name: Applications + - name: Errors + - name: Filters + - name: Inbox + - name: Notifications + - name: Sites + - name: Top Level Types + - name: Member Types diff --git a/src/stackexchange/api.py b/src/stackexchange/api.py new file mode 100644 index 0000000..14080b2 --- /dev/null +++ b/src/stackexchange/api.py @@ -0,0 +1,733 @@ +# https://github.com/microsoft/pyright/issues/1575#issuecomment-1304571290 +# pyright: reportCallIssue=information +# https://github.com/microsoft/pyright/issues/9149 +# pyright: reportArgumentType=information +# Should show "23 informations" +import datetime +import functools +import inspect +import threading +import time +from collections import deque +from collections.abc import Callable, Generator +from http import HTTPMethod +from typing import ( + Any, + ClassVar, + Literal, + overload, +) + +import attrs +import requests +import requests.adapters +import requests.auth +import urllib3 + +from stackexchange.model import * +# noinspection PyProtectedMember +from stackexchange.serdes import ( + query_converter, + unstructure_as_batched_vectors, +) + +__version__ = "2.3" +__all__ = ["StackExchangeApi"] + + +# https://stackoverflow.com/q/6760685 +# https://gist.github.com/wowkin2/3af15bfbf197a14a2b0b2488a1e8c787 +class SingletonMeta(type): + _instances = {} + _init = {} + + # noinspection PyUnusedLocal + def __init__(cls, clsname, bases, dct, **kwds): + super().__init__(type) + cls._init[cls] = dct.get("__init__", None) + + def __call__(cls, *args, **kwargs): + if (init := cls._init[cls]) is not None: + bound = inspect.signature(init).bind(None, *args, **kwargs) + bound.apply_defaults() + callargs = bound.arguments.items() + key = (cls, frozenset(callargs)) + else: + key = cls + if key not in cls._instances: + cls._instances[key] = (super(SingletonMeta, cls) + .__call__(*args, **kwargs)) + return cls._instances[key] + + def __new__(mcs, clsname, bases, dct, **kwds): + return (super(SingletonMeta, mcs) + .__new__(mcs, clsname, bases, dct, **kwds)) + + +@attrs.define(frozen=True, kw_only=True) +class PathParamsInfo[T]: + vector_key: str | None + path_params: dict[str, Any] + + +def api_method(func): + def wrapper(*args, **kwargs): + api_method_name = func.__name__ + return func(*args, **kwargs, initiator=api_method_name) + + return wrapper + + +# pylint: disable=too-few-public-methods +class BearerAuth(requests.auth.AuthBase): + def __init__(self, token): + self.token = token + + def __call__(self, r): + if self.token: + r.headers["Authorization"] = "Bearer " + self.token + return r + + +# pylint: disable=too-many-instance-attributes +# noinspection PyTypeChecker +class StackExchangeApi(metaclass=SingletonMeta): + API_ROOT: ClassVar[str] = f"https://api.stackexchange.com/{__version__}" + API_KEY: ClassVar[str] = "YLTVFmHkeJbm7ZIOoXstag((" + MAX_REQUESTS_PER_DAY: ClassVar[int] = 10_000 + MAX_REQUESTS_PER_SECOND: ClassVar[int] = 30 + """If a single IP is making more than 30 requests a second, new + requests will be dropped. + """ + MAX_CONCURRENT_REQUESTS: ClassVar[int] = 1 + """Just being conservative here, as the exact rate limit mechanisms + are not well-understood. + """ + + def __init__(self, + api_key: str | None = API_KEY, + access_token: str | None = None, + limit_rate: int = MAX_REQUESTS_PER_SECOND) -> None: + self.api_key: str | None = api_key + """API keys, also known as request keys or app keys, + grant more requests per day (10,000 vs 300 for anonymous API + access) and allow querying results past page 25. + """ + self.access_token: str | None = access_token + self._limit_rate: int = ( + limit_rate + if 1 <= limit_rate <= StackExchangeApi.MAX_REQUESTS_PER_SECOND + else StackExchangeApi.MAX_REQUESTS_PER_SECOND + ) + self._limit_rate_deque: deque[int] = deque( + [0] * StackExchangeApi.MAX_CONCURRENT_REQUESTS, + maxlen=StackExchangeApi.MAX_CONCURRENT_REQUESTS, + ) + self._limit_rate_timer = threading.Thread( + target=self._refill_limit_rate_deque, + name="Thread-Limit-Rate-Timer", + daemon=True, + ) + self._quota_remaining: int = StackExchangeApi.MAX_REQUESTS_PER_DAY + self._backoff: dict[str, int] = {} + """ + `Documentation `_: + A dynamic throttle is also in place on a per-method level. + If an application receives a response with the backoff field + set, it must wait that many seconds before hitting the same + method again. + All methods (even seemingly trivial ones) may return backoff. + """ + session = requests.Session() + session.mount( + "https://", + requests.adapters.HTTPAdapter(pool_connections=1, + pool_maxsize=1, + max_retries=urllib3.Retry(total=5)) + ) + self.proxied_request = self._request_hook(session.request) + self._limit_rate_timer.start() + + @property + def limit_rate(self): + return self._limit_rate + + def _refill_limit_rate_deque(self) -> None: + while True: + time.sleep(1 / self.limit_rate) + self._limit_rate_deque.appendleft(0) + + def _request_hook(self, request: Callable[..., requests.models.Response]): + def wrapped_request(initiator: str, params: Parameters): + @functools.wraps(requests.request) + def api_request(method: str, url: str, **kwargs) \ + -> requests.models.Response: + self._respect_quota_remaining() + self._respect_backoff(initiator) + self._respect_rate_limit() + self._assign_request_parameters(params, method, kwargs) + response = request(method, url, **kwargs) + return response + + return api_request + + return wrapped_request + + def _respect_quota_remaining(self): + if self._quota_remaining <= 0: + print("We've reached the daily usage quota. " + + "The program will resume from sleep in 24 hours " + + "(press Ctrl+C to abort the pending operation).", + flush=True) + time.sleep(24 * 60 * 60) + + def _respect_backoff(self, initiator: str) -> None: + if lift_backoff_timestamp := self._backoff.pop(initiator, None): + now_timestamp = datetime.datetime.now(datetime.UTC).timestamp() + if (wait_seconds := int(lift_backoff_timestamp - now_timestamp) + + 1) > 0: # Add 1 more second just to be safe + print( + "We've made too many requests to the Stack Exchange API, " + + f"so we will need to wait for {wait_seconds} seconds. " + + "Please be patient...", + flush=True, + ) + time.sleep(wait_seconds) + + def _respect_rate_limit(self): + while True: + try: + self._limit_rate_deque.pop() + break + except IndexError: + print("Rate limiting has kicked in at " + + f"{self.limit_rate} requests per second.") + time.sleep(1 / self.limit_rate) + + def _assign_request_parameters(self, + params: Parameters, + method: str, + kwargs: dict[str, Any]) -> None: + if "auth" not in kwargs: + kwargs["auth"] = BearerAuth(self.api_key) + if "timeout" not in kwargs: + kwargs["timeout"] = (5, 30) + match method: + case HTTPMethod.GET: + kwargs["params"] = query_converter.unstructure(params) + case _: + kwargs["data"] = query_converter.unstructure(params) + + def _process_response[T](self, + response: requests.models.Response, + model: type[T], + request_initiator: str) -> Response[T]: + self._inspect_response_status(response) + # noinspection PyTypeHints + structured_response = query_converter.loads(response.content or b"", + Response[model]) + self._inspect_quota_remaining(structured_response) + self._inspect_backoff(structured_response, request_initiator) + return structured_response + + @classmethod + def _inspect_response_status(cls, response: requests.models.Response) \ + -> None: + if not response.ok: + x_headers = { + k: v for k, v in response.headers.lower_items() + if k in ( + "x-request-guid", + "x-route-name", + "x-error-status", + "x-error-name", + "x-error-message", + ) + } + raise requests.HTTPError(x_headers, response=response) + + def _inspect_quota_remaining(self, response: Response) -> None: + if (quota_remaining := response.quota_remaining) is not None: + self._quota_remaining = quota_remaining + + def _inspect_backoff(self, + response: Response, + request_initiator: str) -> None: + if response.backoff: + self._backoff[request_initiator] = ( + int(datetime.datetime.now(datetime.UTC).timestamp()) + + response.backoff + + 1 # Add 1 more second just to be safe + ) + + @classmethod + def path_params_info(cls, params: Parameters) -> PathParamsInfo: + params_type = type(params) + vector_key = None + path_params_dict = {} + # noinspection PyDataclass + fields_dict = attrs.fields_dict(params_type) + for k, v in fields_dict.items(): + if (Parameters.PATH_PARAMETER_KEY in v.metadata + and (path_param_ := getattr(params, k)) is not None): + path_params_dict[k] = path_param_ + if isinstance(path_param_, list): + vector_key = k + if vector_key: + path_params_dict[vector_key] = unstructure_as_batched_vectors( + path_params_dict[vector_key], + fields_dict[vector_key], + ) + return PathParamsInfo(vector_key=vector_key, + path_params=path_params_dict) + + @overload + # pylint: disable=too-many-arguments + def _call_api[T](self, + /, + url_template: str, + params: Parameters, + model: type[T], + *, + initiator: str = ..., + http_method: HTTPMethod = ..., + auto_pagination: bool = ..., + items_only: Literal[False], + **kwargs) \ + -> Generator[Response[T], None, None]: + ... + + @overload + # pylint: disable=too-many-arguments + def _call_api[T](self, + /, + url_template: str, + params: Parameters, + model: type[T], + *, + initiator: str = ..., + http_method: HTTPMethod = ..., + auto_pagination: bool = ..., + items_only: Literal[True] = ..., + **kwargs) \ + -> Generator[T, None, None]: + ... + + # pylint: disable=too-many-arguments, too-many-locals + def _call_api[T](self, + /, + url_template: str, + params: Parameters, + model: type[T], + *, + initiator="unknown", + http_method: HTTPMethod = HTTPMethod.GET, + auto_pagination=True, + items_only=True, + **kwargs) \ + -> Generator[Response[T] | T, None, None]: + info = self.path_params_info(params) + batched_vectors = (info.path_params[info.vector_key] + if info.vector_key + else [0]) + for vector in batched_vectors: + if info.vector_key: + info.path_params[info.vector_key] = vector + url = url_template.format(**info.path_params) + page = 0 + has_more = True + while has_more: + page += 1 + if auto_pagination: + setattr( + params, + "paging", + Paging(page=page, pagesize=Parameters.MAX_PAGE_SIZE) + ) + request = self.proxied_request(initiator, params) + response = request(http_method, url, **kwargs) + structured_response \ + = self._process_response(response, model, initiator) + has_more = ((structured_response.has_more + or structured_response.items) + and auto_pagination) + if items_only: + yield from structured_response.items or [] + else: + yield structured_response + + @overload + @api_method + def questions_by_ids(self, + /, + params: QuestionsByIdsParameters, + *, + items_only: Literal[False], + **kwargs) \ + -> Generator[Response[Question], None, None]: + ... + + @overload + @api_method + def questions_by_ids(self, + /, + params: QuestionsByIdsParameters, + *, + items_only: Literal[True] = ..., + **kwargs) \ + -> Generator[Question, None, None]: + ... + + @api_method + def questions_by_ids(self, + /, + params: QuestionsByIdsParameters, + *, + items_only=True, + **kwargs) \ + -> Generator[Response[Question] | Question, None, None]: + """`Documentation `_ + Returns the questions identified in {ids}. + + :param params: + :param items_only: + :return: + """ + return self._call_api( + StackExchangeApi.API_ROOT + "/questions/{ids}", + params, + Question, + auto_pagination=True, + items_only=items_only, + **kwargs, + ) + + @overload + @api_method + def answers_on_users(self, + /, + params: AnswersOnUsersParameters, + *, + auto_pagination: bool = ..., + items_only: Literal[False], + **kwargs) \ + -> Generator[Response[Answer], None, None]: + ... + + @overload + @api_method + def answers_on_users(self, + /, + params: AnswersOnUsersParameters, + *, + auto_pagination: bool = ..., + items_only: Literal[True] = ..., + **kwargs) \ + -> Generator[Answer, None, None]: + ... + + @api_method + def answers_on_users(self, + /, + params: AnswersOnUsersParameters, + *, + auto_pagination=True, + items_only=True, + **kwargs) \ + -> Generator[Response[Answer] | Answer, None, None]: + """`Documentation `_ + Returns the answers the users in {ids} have posted. + + :param params: + :param auto_pagination: + :param items_only: + :return: + """ + return self._call_api( + StackExchangeApi.API_ROOT + "/users/{ids}/answers", + params, + Answer, + auto_pagination=auto_pagination, + items_only=items_only, + **kwargs, + ) + + @overload + @api_method + def questions_on_users(self, + /, + params: QuestionsOnUsersParameters, + *, + auto_pagination: bool = ..., + items_only: Literal[False], + **kwargs) \ + -> Generator[Response[Question], None, None]: + ... + + @overload + @api_method + def questions_on_users(self, + /, + params: QuestionsOnUsersParameters, + *, + auto_pagination: bool = ..., + items_only: Literal[True] = ..., + **kwargs) \ + -> Generator[Question, None, None]: + ... + + @api_method + def questions_on_users(self, + /, + params: QuestionsOnUsersParameters, + *, + auto_pagination=True, + items_only=True, + **kwargs) \ + -> Generator[Response[Question] | Question, None, None]: + """`Documentation `_ + Gets the questions asked by the users in {ids}. + + :param params: + :param auto_pagination: + :param items_only: + :return: + """ + return self._call_api( + StackExchangeApi.API_ROOT + "/users/{ids}/questions", + params, + Question, + auto_pagination=auto_pagination, + items_only=items_only, + **kwargs, + ) + + @api_method + def simulate_error(self, /, params: SimulateErrorParameters, **kwargs) \ + -> Response[Any]: + """`Documentation `_ + This method allows you to generate an error. + + :param params: + :return: + """ + return next( + self._call_api( + StackExchangeApi.API_ROOT + "/errors/{id}", + params, + Error, + auto_pagination=False, + items_only=False, + **kwargs, + ) + ) + + @overload + @api_method + def create_filter(self, + /, + params: CreateFilterParameters, + *, + http_method: Literal[HTTPMethod.GET] + | Literal[HTTPMethod.POST] = ..., + items_only: Literal[False], + **kwargs) \ + -> Response[Filter]: + ... + + @overload + @api_method + def create_filter(self, + /, + params: CreateFilterParameters, + *, + http_method: Literal[HTTPMethod.GET] + | Literal[HTTPMethod.POST] = ..., + items_only: Literal[True] = ..., + **kwargs) \ + -> Filter: + ... + + @api_method + def create_filter(self, + /, + params: CreateFilterParameters, + *, + http_method: Literal[HTTPMethod.GET] + | Literal[HTTPMethod.POST] = HTTPMethod.GET, + items_only=True, + **kwargs) \ + -> Response[Filter] | Filter: + """`Documentation `_ + Creates a new filter given a list of includes, excludes, a base + filter, and whether or not this filter should be "unsafe". + + When building filters, refer to the fields of the + common wrapper object with a leading "." + + :param params: + :param http_method: + :param items_only: + :return: + """ + return next( + self._call_api( + StackExchangeApi.API_ROOT + "/filters/create", + params, + Filter, + http_method=http_method, + auto_pagination=False, + items_only=items_only, + **kwargs, + ) + ) + + @overload + @api_method + def read_filter(self, + /, + params: ReadFilterParameters, + *, + items_only: Literal[False], + **kwargs) \ + -> Generator[Response[Filter], None, None]: + ... + + @overload + @api_method + def read_filter(self, + /, + params: ReadFilterParameters, + *, + items_only: Literal[True] = ..., + **kwargs) \ + -> Generator[Filter, None, None]: + ... + + @api_method + def read_filter(self, + /, + params: ReadFilterParameters, + *, + items_only=True, + **kwargs) \ + -> Generator[Response[Filter] | Filter, None, None]: + """`Documentation `_ + Returns the fields included by the given filters, + and the "safeness" of those filters. + + :param params: + :param items_only: + :return: + """ + return self._call_api( + StackExchangeApi.API_ROOT + "/filters/{filters}", + params, + Filter, + auto_pagination=True, + items_only=items_only, + **kwargs, + ) + + @overload + @api_method + def sites(self, + /, + params: SitesParameters, + *, + auto_pagination: bool = ..., + items_only: Literal[False], + **kwargs) \ + -> Generator[Response[Site], None, None]: + ... + + @overload + @api_method + def sites(self, + /, + params: SitesParameters, + *, + auto_pagination: bool = ..., + items_only: Literal[True] = ..., + **kwargs) \ + -> Generator[Site, None, None]: + ... + + @api_method + def sites(self, + /, + params: SitesParameters, + *, + auto_pagination=True, + items_only=True, + **kwargs) \ + -> Generator[Response[Site] | Site, None, None]: + """`Documentation `_ + Returns all sites in the network. + + :param params: + :param auto_pagination: + :param items_only: + :return: + """ + return self._call_api( + StackExchangeApi.API_ROOT + "/sites", + params, + Site, + auto_pagination=auto_pagination, + items_only=items_only, + **kwargs, + ) + + @overload + @api_method + def associated_users(self, + /, + params: AssociatedUsersParameters, + *, + auto_pagination: bool = ..., + items_only: Literal[False], + **kwargs) \ + -> Generator[Response[NetworkUser], None, None]: + ... + + @overload + @api_method + def associated_users(self, + /, + params: AssociatedUsersParameters, + *, + auto_pagination: bool = ..., + items_only: Literal[True] = ..., + **kwargs) \ + -> Generator[NetworkUser, None, None]: + ... + + @api_method + def associated_users(self, + /, + params: AssociatedUsersParameters, + *, + auto_pagination=True, + items_only=True, + **kwargs) \ + -> Generator[Response[NetworkUser] | NetworkUser, None, None]: + """`Documentation `_ + Returns all of a user's associated accounts, + given their account_ids in {ids}. + It is a `known bug `_ that + results are not returned for meta sites. + + :param params: + :param auto_pagination: + :param items_only: + :return: + """ + return self._call_api( + StackExchangeApi.API_ROOT + "/users/{ids}/associated", + params, + NetworkUser, + auto_pagination=auto_pagination, + items_only=items_only, + **kwargs, + ) diff --git a/src/stackexchange/backup.py b/src/stackexchange/backup.py new file mode 100644 index 0000000..52cd121 --- /dev/null +++ b/src/stackexchange/backup.py @@ -0,0 +1,327 @@ +from argparse import ArgumentParser, Namespace +from collections.abc import Sequence +from os import PathLike, scandir +from pathlib import Path +from shutil import rmtree +from typing import ( + Literal, + TypeAlias, + cast, + get_args, +) + +from attrs import ( + Factory, + define, + field, +) +from urllib3.util import parse_url + +from stackexchange.api import StackExchangeApi +from stackexchange.model import ( + Answer, + AnswerMetadata, + AnswersOnUsersParameters, + AssociatedUsersParameters, + Question, + QuestionMetadata, + QuestionsByIdsParameters, + QuestionsOnUsersParameters, + SitesParameters, +) +from stackexchange.serdes import metadata_converter, query_converter + +__all__ = [ + "NetworkUserInfo", + "get_network_users", + "acquire_missing_network_users", + "backup_user_questions", + "backup_user_answers", + "create_output_file", + "get_output_path", +] + +OutputFormat: TypeAlias = Literal["markdown", "json"] + + +@define(frozen=True, kw_only=True) +class NetworkUserInfo: + site_domain_name: str + user_id: int + user_question_ids: set[int] = field(default=Factory(set[int]), eq=False) + + +api = StackExchangeApi() + + +def main() -> None: + args = parse_arguments() + backup_root = Path(args.out_dir, f"stack_user_{args.account_id}").resolve() + backup_root.mkdir(exist_ok=True) + if args.clean: + for entry in scandir(backup_root): + if entry.is_dir() and not entry.name.startswith("."): + rmtree(entry.path) + global api # pylint: disable=global-statement + api = StackExchangeApi(api_key=args.api_key, limit_rate=args.limit_rate) + network_users = get_network_users(args.account_id, args.no_meta) + print(f"Found {len(network_users)} Stack Exchange sites associated with " + + f"https://stackexchange.com/users/{args.account_id}/") + for i, network_user in enumerate(network_users, start=1): + print("Downloading and writing questions from site " + + f"{i}/{len(network_users)} " + + f"({network_user.site_domain_name})...", + end="", + flush=True) + backup_user_questions(network_user, backup_root, args.format) + print("Done.") + print("Downloading and writing answers from site " + + f"{i}/{len(network_users)} " + + f"({network_user.site_domain_name})...", + end="", + flush=True) + backup_user_answers(network_user, backup_root, args.format) + print("Done.") + + +def parse_arguments(args: Sequence[str] | None = None) -> Namespace: + parser = ArgumentParser(allow_abbrev=False) + parser.add_argument( + "--account-id", + type=int, + required=True, + help="user account ID on stackexchange.com", + ) + parser.add_argument( + "--out-dir", + default=".", + type=str, + help="output directory (defaults to the current working directory)", + ) + parser.add_argument( + "--format", + default="markdown", + type=str, + choices=get_args(OutputFormat), + help="output file format (default: %(default)s)", + ) + parser.add_argument( + "--no-meta", + action="store_true", + help="do not back up posts on meta sites", + ) + parser.add_argument( + "--clean", + action="store_true", + help="remove files from the stack_user_id subdirectory before back up", + ) + parser.add_argument( + "--api-key", + default=StackExchangeApi.API_KEY, + type=str, + help="API key (for debugging only)", + ) + parser.add_argument( + "--limit-rate", + default=10, + type=int, + help="maximum request rate in requests per second within the integer " + + f"range of 1 and {StackExchangeApi.MAX_REQUESTS_PER_SECOND} " + + "inclusive (default: %(default)d)", + ) + parsed_args = parser.parse_args(args) + validate_parsed_arguments(parsed_args, parser) + return parsed_args + + +def validate_parsed_arguments(args: Namespace, parser: ArgumentParser) -> None: + if not 1 <= args.limit_rate <= StackExchangeApi.MAX_REQUESTS_PER_SECOND: + msg = ("argument --limit-rate: out of range int value: " + + f"'{args.limit_rate}'") + parser.error(msg) + + +def get_network_users(account_id: int, no_meta: bool = False) \ + -> set[NetworkUserInfo]: + associated_users = api.associated_users( + AssociatedUsersParameters( + ids=[account_id], + filter="!2SUoF4c)sOul00Zq", + types=cast(list[Literal["main_site", "meta_site"]], + ["main_site"] if no_meta + else ["main_site", "meta_site"]), + ) + ) + # noinspection PyUnboundLocalVariable + network_users = set[NetworkUserInfo]( + NetworkUserInfo( + site_domain_name=site_host, + user_id=associated_user.user_id, + ) + for associated_user in associated_users + if associated_user.user_id + and associated_user.site_url + and (site_host := parse_url(associated_user.site_url).host) + and (not no_meta or site_host not in + {"meta.stackexchange.com", "stackapps.com"}) + ) + if not no_meta: + acquire_missing_network_users(network_users) + return network_users + + +def acquire_missing_network_users(network_users: set[NetworkUserInfo]) -> None: + """ + Apply fix for :meth:`StackExchangeApi.associated_users` not + returning results for meta sites. + + :param network_users: + :return: + """ + users_dict = {user.site_domain_name: user.user_id + for user in network_users} + for site in api.sites(SitesParameters()): + if (site.site_type == "main_site" + and site.site_url + and (site_host := parse_url(site.site_url).host) + in users_dict): + for related_site in site.related_sites or []: + if (related_site.relation == "meta" + and related_site.site_url + and ( + related_site_host + := parse_url(related_site.site_url).host + )): + network_users.add( + NetworkUserInfo( + site_domain_name=related_site_host, + user_id=users_dict[site_host], + ) + ) + + +def backup_user_questions(network_user: NetworkUserInfo, + backup_root: str | PathLike[str], + output_format: OutputFormat = "markdown") -> None: + f = "r8cwHZB3p97RraWJSdBqs7HCWXUCebDx9Wuhn_ChbmNDTEZ3_1lkd3suiMKEh6U-zwe.EML1(4mmULGTB" + questions = api.questions_on_users( + QuestionsOnUsersParameters( + ids=[network_user.user_id], + site=network_user.site_domain_name, + filter=f, + ) + ) + for question in questions: + if question.question_id is not None: + network_user.user_question_ids.add(question.question_id) + create_output_file(network_user, + backup_root, + output_format, + "q", + question) + for answer in question.answers or []: + create_output_file(network_user, + backup_root, + output_format, + "q", + answer) + + +def backup_user_answers(network_user: NetworkUserInfo, + backup_root: str | PathLike[str], + output_format: OutputFormat = "markdown") -> None: + answers = api.answers_on_users( + AnswersOnUsersParameters( + ids=[network_user.user_id], + site=network_user.site_domain_name, + filter="!6aC-iR(QLBu-5SKm", + ) + ) + not_my_question_ids = ({answer.question_id for answer in answers + if answer.question_id is not None} + - network_user.user_question_ids) + if not not_my_question_ids: + return + f = "r8cwHZB3p97RraWJSdBqs7HCWXUCebDx9Wuhn_ChbmNDTEZ3_1lkd3suiMKEh6U-zwe.EML1(4mmULGTB" + questions = api.questions_by_ids( + QuestionsByIdsParameters( + ids=list(not_my_question_ids), + site=network_user.site_domain_name, + filter=f, + ) + ) + for question in questions: + create_output_file(network_user, + backup_root, + output_format, + "a", + question) + for answer in question.answers or []: + create_output_file(network_user, + backup_root, + output_format, + "a", + answer) + + +def create_output_file(network_user: NetworkUserInfo, + backup_root: str | PathLike[str], + output_format: OutputFormat, + contribution_type: Literal["a", "q"], + post: Question | Answer) -> None: + output_file = get_output_path(network_user, + backup_root, + output_format, + contribution_type, + post) + output_file.parent.mkdir(parents=True, exist_ok=True) + with output_file.open(mode="w", encoding="utf-8", newline="") as f: + match output_format: + case "markdown": + post_dict = query_converter.unstructure(post) + match post: + case Question(): + frontmatter = metadata_converter.structure( + post_dict, + QuestionMetadata, + ) + case Answer(): + frontmatter = metadata_converter.structure( + post_dict, + AnswerMetadata, + ) + if frontmatter: + metadata_converter.dumps(frontmatter, f) + if post.body_markdown: + f.write(post.body_markdown) + case "json": + f.write(query_converter.dumps(post, indent=2)) + + +def get_output_path(network_user: NetworkUserInfo, + backup_root: str | PathLike[str], + output_format: OutputFormat, + contribution_type: Literal["a", "q"], + post: Question | Answer) -> Path: + out_dir = (Path(backup_root, + network_user.site_domain_name, + contribution_type, + str(post.question_id)) + .resolve()) + out_dir.relative_to(backup_root) + match output_format: + case "markdown": + suffix = ".md" + case "json": + suffix = ".json" + match post: + case Question(): + basename = "index" + case Answer(): + basename = str(post.answer_id) + output_file = Path(out_dir, basename).with_suffix(suffix) + return output_file + + +if __name__ == "__main__": + main() diff --git a/src/stackexchange/generated/_model_openapi.py b/src/stackexchange/generated/_model_openapi.py new file mode 100644 index 0000000..65ddb68 --- /dev/null +++ b/src/stackexchange/generated/_model_openapi.py @@ -0,0 +1,703 @@ +# generated by datamodel-codegen: +# filename: openapi.yaml + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Literal + +from attr import dataclass + + +@dataclass(kw_only=True) +class Answer: + """ + This type represents an answer to a question on one of the Stack Exchange sites, such as this famous answer of bobince's. + + As on the question page, it is possible to fetch the comments on an answer as part of a call; though this is not done by default. + + The upvoted, downvoted, and accepted fields can only be queried for with an access_token with the private_info scope. + + """ + + accepted: bool | None = None + answer_id: int | None = None + """ + refers to an Answer + """ + awarded_bounty_amount: int | None = None + awarded_bounty_users: list[ShallowUser] | None = None + body: str | None = None + """ + unchanged in unsafe filters + """ + body_markdown: str | None = None + can_comment: bool | None = None + can_edit: bool | None = None + can_flag: bool | None = None + can_suggest_edit: bool | None = None + collectives: list[Collective] | None = None + comment_count: int | None = None + comments: list[Comment] | None = None + community_owned_date: int | None = None + content_license: str | None = None + creation_date: int | None = None + down_vote_count: int | None = None + downvoted: bool | None = None + """ + private_info + """ + is_accepted: bool | None = None + last_activity_date: int | None = None + last_edit_date: int | None = None + last_editor: ShallowUser | None = None + link: str | None = None + """ + unchanged in unsafe filters + """ + locked_date: int | None = None + owner: ShallowUser | None = None + posted_by_collectives: list[Collective] | None = None + question_id: int | None = None + """ + refers to a Question + """ + recommendations: list[CollectiveRecommendation] | None = None + score: int | None = None + share_link: str | None = None + """ + unchanged in unsafe filters + """ + tags: list[str] | None = None + title: str | None = None + up_vote_count: int | None = None + upvoted: bool | None = None + """ + private_info + """ + + +@dataclass(kw_only=True) +class AnswersOnUsersParametersQuery: + site: str | None = None + """ + Per-site methods operates on a single site at a time, identified by the site parameter. + This parameter can be the full domain name (ie. "stackoverflow.com"), + or a short form identified by api_site_parameter on the Site object. + + """ + filter: str | None = None + """ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + + """ + paging: Paging | None = None + complex: Complex | None = None + + +@dataclass(kw_only=True) +class AssociatedUsersParametersQuery: + filter: str | None = None + """ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + + """ + paging: Paging | None = None + types: list[Literal["main_site", "meta_site"]] | None = None + """ + Specify, semicolon delimited, main_site or meta_site to filter by site. + """ + + +@dataclass(kw_only=True) +class BadgeCount: + """ + This type represents the total Badges, segregated by rank, a user has earned. + + """ + + bronze: int | None = None + gold: int | None = None + silver: int | None = None + + +@dataclass(kw_only=True) +class ClosedDetails: + """ + This type represents details about a question closure. + + """ + + by_users: list[ShallowUser] | None = None + description: str | None = None + """ + unchanged in unsafe filters + """ + on_hold: bool | None = None + original_questions: list[OriginalQuestion] | None = None + reason: str | None = None + """ + unchanged in unsafe filters + """ + + +@dataclass(kw_only=True) +class Collective: + """ + This type represents a collective on a Stack Exchange site. + + """ + + description: str | None = None + external_links: list[CollectiveExternalLink] | None = None + link: str | None = None + name: str | None = None + slug: str | None = None + tags: list[str] | None = None + + +@dataclass(kw_only=True) +class CollectiveExternalLink: + """ + Represents a collective's external link. + + """ + + link: str | None = None + type: ( + Literal[ + "website", + "twitter", + "github", + "facebook", + "instagram", + "support", + "linkedin", + ] + | None + ) = None + + +@dataclass(kw_only=True) +class CollectiveRecommendation: + """ + Represents a collective's recommendation of an answer. + + """ + + collective: Collective | None = None + creation_date: int | None = None + + +@dataclass(kw_only=True) +class Comment: + """ + All Questions and Answers on a Stack Exchange site can be commented on, and this type represents those comments. + + Comments can also be optionally directed at users, when this is the case the reply_to_user property is set (if it is requested in the current filter). + + The upvoted field can only be queried for with an access_token with the private_info scope. + + """ + + body: str | None = None + """ + unchanged in unsafe filters + """ + body_markdown: str | None = None + can_flag: bool | None = None + comment_id: int | None = None + """ + refers to a Comment + """ + content_license: str | None = None + creation_date: int | None = None + edited: bool | None = None + link: str | None = None + """ + unchanged in unsafe filters + """ + owner: ShallowUser | None = None + post_id: int | None = None + """ + refers to a Post + """ + post_type: Literal["question", "answer", "article"] | None = None + reply_to_user: ShallowUser | None = None + score: int | None = None + upvoted: bool | None = None + """ + private_info + """ + + +@dataclass(kw_only=True) +class Complex: + sort: str | Literal["activity", "creation", "votes"] | None = None + min: int | None = None + max: int | None = None + fromdate: int | None = None + """ + Defines the lower bound of creation_date. + """ + todate: int | None = None + """ + Defines the upper bound of creation_date. + """ + order: Literal["asc", "desc"] | None = None + """ + Omitted in the documentation, but is always found to accompany other properties in the group. + """ + + +@dataclass(kw_only=True) +class CreateFilterParametersQuery: + filter: str | None = None + """ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + + """ + include: list[str] | None = None + exclude: list[str] | None = None + base: str | None = None + unsafe: bool | None = False + + +@dataclass(kw_only=True) +class Error: + """ + This type is used to describe the errors that can be returned by the API. + + It is not expected that many applications will concern themselves with this type. It is made available for development and testing purposes. + + Note that the field-name description for each record returned differs from the field-name error_message that is part of the common response wrapper on individual API requests. + + """ + + description: str | None = None + error_id: int | None = None + """ + refers to an Error + """ + error_name: str | None = None + + +@dataclass(kw_only=True) +class Filter: + """ + This type describes a filter on the API. + + When passing a filter to methods in the API, it should be referred to by name alone. + + """ + + filter: str | None = None + filter_type: Literal["safe", "unsafe", "invalid"] | None = None + included_fields: list[str] | None = None + + +@dataclass(kw_only=True) +class MigrationInfo: + """ + This type represents a question's migration to or from a different site in the Stack Exchange network. + + """ + + on_date: int | None = None + other_site: Site | None = None + question_id: int | None = None + """ + refers to a Question + """ + + +@dataclass(kw_only=True) +class NetworkPost: + """ + This type represents a post on the network, however it is greatly reduced when compared to the full Post type to reduce the amount of work that needs to be done to fetch it from multiple sites in the network. + + """ + + post_id: int | None = None + """ + refers to a Post + """ + post_type: Literal["question", "answer", "article"] | None = None + score: int | None = None + title: str | None = None + + +@dataclass(kw_only=True) +class NetworkUser: + """ + This type represents a user, however it is greatly reduced when compared to the full User type to reduce the amount of work that needs to be done to fetch it from multiple sites in the network. + + """ + + account_id: int | None = None + answer_count: int | None = None + badge_counts: BadgeCount | None = None + creation_date: int | None = None + last_access_date: int | None = None + question_count: int | None = None + reputation: int | None = None + site_name: str | None = None + site_url: str | None = None + top_answers: list[NetworkPost] | None = None + top_questions: list[NetworkPost] | None = None + user_id: int | None = None + """ + refers to a User + """ + user_type: ( + Literal[ + "unregistered", + "registered", + "moderator", + "team_admin", + "does_not_exist", + ] + | None + ) = None + + +@dataclass(kw_only=True) +class Notice: + """ + Represents a notice on a post. + + """ + + body: str | None = None + """ + unchanged in unsafe filters + """ + creation_date: int | None = None + owner_user_id: int | None = None + + +@dataclass(kw_only=True) +class OriginalQuestion: + """ + This type represents an "original" question that another was closed as a duplicate of. + + This type is mostly analogous to a row in the "gray box" that appears in questions closed a dupes which lists original questions. + + """ + + accepted_answer_id: int | None = None + answer_count: int | None = None + question_id: int | None = None + """ + refers to a Question + """ + title: str | None = None + + +@dataclass(kw_only=True) +class Paging: + page: int | None = 1 + """ + Minimum and maximum values determined from testing. + """ + pagesize: int | None = 30 + """ + The effective minimum and maximum are, respectively, -2^31 and 100, + with the sole exception of the /sites route for which the effective maximum is 2^31 - 2, + beyond which a bad_parameter error is returned. + Negative integers are treated as 0. + + """ + + +@dataclass(kw_only=True) +class Question: + """ + This type represents a question on one of the Stack Exchange sites, such as this famous RegEx question. + + This type is heavily inspired by the question page itself, and can optionally return comments and answers accordingly. + + The upvoted, downvoted, and favorited fields can only be queried for with an access_token with the private_info scope. + + """ + + accepted_answer_id: int | None = None + answer_count: int | None = None + answers: list[Answer] | None = None + body: str | None = None + """ + unchanged in unsafe filters + """ + body_markdown: str | None = None + bounty_amount: int | None = None + bounty_closes_date: int | None = None + bounty_user: ShallowUser | None = None + can_answer: bool | None = None + can_close: bool | None = None + can_comment: bool | None = None + can_edit: bool | None = None + can_flag: bool | None = None + can_suggest_edit: bool | None = None + close_vote_count: int | None = None + closed_date: int | None = None + closed_details: ClosedDetails | None = None + closed_reason: str | None = None + collectives: list[Collective] | None = None + comment_count: int | None = None + comments: list[Comment] | None = None + community_owned_date: int | None = None + content_license: str | None = None + creation_date: int | None = None + delete_vote_count: int | None = None + down_vote_count: int | None = None + downvoted: bool | None = None + """ + private_info + """ + favorite_count: int | None = None + favorited: bool | None = None + """ + private_info + """ + is_answered: bool | None = None + last_activity_date: int | None = None + last_edit_date: int | None = None + last_editor: ShallowUser | None = None + link: str | None = None + """ + unchanged in unsafe filters + """ + locked_date: int | None = None + migrated_from: MigrationInfo | None = None + migrated_to: MigrationInfo | None = None + notice: Notice | None = None + owner: ShallowUser | None = None + posted_by_collectives: list[Collective] | None = None + protected_date: int | None = None + question_id: int | None = None + """ + refers to a Question + """ + reopen_vote_count: int | None = None + score: int | None = None + share_link: str | None = None + """ + unchanged in unsafe filters + """ + tags: list[str] | None = None + title: str | None = None + up_vote_count: int | None = None + upvoted: bool | None = None + """ + private_info + """ + view_count: int | None = None + + +@dataclass(kw_only=True) +class QuestionsByIdsParametersQuery: + site: str | None = None + """ + Per-site methods operates on a single site at a time, identified by the site parameter. + This parameter can be the full domain name (ie. "stackoverflow.com"), + or a short form identified by api_site_parameter on the Site object. + + """ + filter: str | None = None + """ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + + """ + paging: Paging | None = None + complex: Complex | None = None + + +@dataclass(kw_only=True) +class QuestionsOnUsersParametersQuery: + site: str | None = None + """ + Per-site methods operates on a single site at a time, identified by the site parameter. + This parameter can be the full domain name (ie. "stackoverflow.com"), + or a short form identified by api_site_parameter on the Site object. + + """ + filter: str | None = None + """ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + + """ + paging: Paging | None = None + complex: Complex | None = None + + +@dataclass(kw_only=True) +class ReadFilterParametersQuery: + filter: str | None = None + """ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + + """ + paging: Paging | None = None + + +@dataclass(kw_only=True) +class RelatedSite: + """ + This type represents a site that is related in some way to another site. + + Examples include chat and meta, and parent sites. + + Applications should be able to gracefully handle the additon of new related site types. + + """ + + api_site_parameter: str | None = None + name: str | None = None + relation: str | None = None + """ + one of parent, meta, or chat, but new options may be added. + """ + site_url: str | None = None + """ + unchanged in unsafe filters + """ + + +@dataclass(kw_only=True) +class ResponseWrapper: + backoff: int | None = None + error_id: int | None = None + """ + refers to an Error + """ + error_message: str | None = None + error_name: str | None = None + has_more: bool | None = None + items: list[dict[str, Any]] | None = None + """ + an array of the type found in type + """ + page: int | None = None + page_size: int | None = None + quota_max: int | None = None + quota_remaining: int | None = None + total: int | None = None + type: str | None = None + + +@dataclass(kw_only=True) +class ShallowUser: + """ + This type represents a user, but omits many of the fields found on the full User type. + + This type is mostly analogous to the "user card" found on many pages (like the question page) on a Stack Exchange site. + + """ + + accept_rate: int | None = None + account_id: int | None = None + badge_counts: BadgeCount | None = None + display_name: str | None = None + link: str | None = None + """ + unchanged in unsafe filters + """ + profile_image: str | None = None + """ + unchanged in unsafe filters + """ + reputation: int | None = None + user_id: int | None = None + """ + refers to a User + """ + user_type: ( + Literal[ + "unregistered", + "registered", + "moderator", + "team_admin", + "does_not_exist", + ] + | None + ) = None + + +@dataclass(kw_only=True) +class SimulateErrorParametersQuery: + filter: str | None = None + """ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + + """ + + +@dataclass(kw_only=True) +class Site: + """ + This type represents a site in the Stack Exchange network. + + """ + + aliases: list[str] | None = None + api_site_parameter: str | None = None + audience: str | None = None + closed_beta_date: int | None = None + favicon_url: str | None = None + high_resolution_icon_url: str | None = None + icon_url: str | None = None + launch_date: int | None = None + logo_url: str | None = None + markdown_extensions: list[str] | None = None + """ + an array of 'MathJax', 'Prettify', 'Balsamiq' or 'jTab' strings, but new options may be added. + """ + name: str | None = None + open_beta_date: int | None = None + related_sites: list[RelatedSite] | None = None + site_state: ( + Literal["normal", "closed_beta", "open_beta", "linked_meta"] | None + ) = None + site_type: str | None = None + """ + one of main_site or meta_site, but new options may be added. + """ + site_url: str | None = None + styling: Styling | None = None + twitter_account: str | None = None + + +@dataclass(kw_only=True) +class SitesParametersQuery: + filter: str | None = None + """ + Filters allow applications to reduce API responses to just the fields they are concerned with, + saving bandwidth. + + """ + paging: Paging | None = None + + +@dataclass(kw_only=True) +class Styling: + """ + This type represents some stylings of a site in the Stack Exchange network. + + These stylings are meant to allow developers to subtly vary the presentation of resources in their applications so as to indicate to users the original source site. + + Applications should be able to gracefully handle these styles changes, though they can safely assume that these style changes are infrequent. + + Note that colors can be returned either as six or three digit hex triplets. + + """ + + link_color: str | None = None + tag_background_color: str | None = None + tag_foreground_color: str | None = None diff --git a/src/stackexchange/model.py b/src/stackexchange/model.py new file mode 100644 index 0000000..ccbc77e --- /dev/null +++ b/src/stackexchange/model.py @@ -0,0 +1,344 @@ +# ruff: noqa: F722 +# pylint: disable=too-few-public-methods +import datetime +import math +from abc import ABCMeta +from typing import ( + ClassVar, + dataclass_transform, + get_args, +) + +from attr import attrib +from attrs import ( + Converter, + define, + field, + make_class, +) +from urllib3.util import Url, parse_url + +# noinspection PyProtectedMember +from stackexchange.generated._model_openapi import * + +__all__ = [ + # Component schemas (re-exported from .generated._model_openapi) + ## Top level types + "Answer", + "Collective", + "Comment", + "Error", + "Filter", + "NetworkUser", + "Question", + "Site", + ## Member types + "BadgeCount", + "ClosedDetails", + "CollectiveExternalLink", + "CollectiveRecommendation", + "MigrationInfo", + "NetworkPost", + "Notice", + "OriginalQuestion", + "RelatedSite", + "ShallowUser", + "Styling", + # Component parameters (re-exported from .generated._model_openapi) + "Paging", + "Complex", + # Types + "BuiltInFilter", + "BakedInFilter", + # Generic response + "Response", + # Parameters + "Parameters", + "QuestionsByIdsParameters", + "AnswersOnUsersParameters", + "QuestionsOnUsersParameters", + "SimulateErrorParameters", + "CreateFilterParameters", + "ReadFilterParameters", + "SitesParameters", + "AssociatedUsersParameters", + # YAML frontmatter metadata + "Metadata", + "ShallowUserMetadata", + "CommentMetadata", + "AnswerMetadata", + "QuestionMetadata", +] + +type BuiltInFilter = Literal[ + "default", + "withbody", + "none", + "total", +] +type BakedInFilter = Literal[ + "!-0ttWpKaHtrB(oS", + "!2SUoF4c)sOul00Zq", + "r8cwHZB3p97RraWJSdBqs7HCWXUCebDx9Wuhn_ChbmNDTEZ3_1lkd3suiMKEh6U-zwe.EML1(4mmULGTB", + "!6aC-iR(QLBu-5SKm", +] + + +# pylint: disable=too-many-instance-attributes +@define(kw_only=True) +class Response[T]: + backoff: int | None = None + error_id: int | None = None + """ + refers to an Error + """ + error_message: str | None = None + error_name: str | None = None + has_more: bool | None = None + items: list[T] | None = None + """ + an array of the type found in type + """ + page: int | None = None + page_size: int | None = None + quota_max: int | None = None + quota_remaining: int | None = None + total: int | None = None + type: str | None = None + + +@define(frozen=True) +class ContentLicenseOption: + name: str + url: Url + starting_date: float + ending_date: float + + +# https://meta.stackexchange.com/help/licensing +content_license_options = { + content_license_option.name: content_license_option + for content_license_option in ( + ContentLicenseOption( + "CC BY-SA 4.0", + parse_url("https://creativecommons.org/licenses/by-sa/4.0/"), + datetime.datetime(2018, 5, 2, tzinfo=datetime.UTC).timestamp(), + math.inf, + ), + ContentLicenseOption( + "CC BY-SA 3.0", + parse_url("https://creativecommons.org/licenses/by-sa/3.0/"), + datetime.datetime(2011, 4, 8, tzinfo=datetime.UTC).timestamp(), + datetime.datetime(2018, 5, 2, tzinfo=datetime.UTC).timestamp(), + ), + ContentLicenseOption( + "CC BY-SA 2.5", + parse_url("https://creativecommons.org/licenses/by-sa/2.5/"), + -math.inf, + datetime.datetime(2011, 4, 8, tzinfo=datetime.UTC).timestamp(), + ), + ) +} + + +# https://meta.stackexchange.com/q/411264 +def guess_content_license_from_publication_date(lic: str | None, obj) \ + -> str | None: + if lic is None: + if ((publication_date := getattr(obj, "last_edit_date", None)) + or (publication_date := getattr(obj, "creation_date", None))): + for content_license_option in content_license_options.values(): + if (content_license_option.starting_date + <= publication_date + < content_license_option.ending_date): + lic = content_license_option.name + "?" + return lic + + +globals()["Question"] = define( + make_class( + Question.__name__, + { + "content_license": field( + default=None, + converter=Converter( + guess_content_license_from_publication_date, + takes_self=True, + ), + ), + }, + bases=(Question,), + ), + kw_only=True, +) + + +# region Parameters + + +class Parameters(metaclass=ABCMeta): + PATH_PARAMETER_KEY: ClassVar[str] = "PATH_PARAMETER" + VECTOR_LIMIT_KEY: ClassVar[str] = "VECTOR_LIMIT" + DEFAULT_VECTOR_LIMIT: ClassVar[int] = 100 + MAX_PAGE_SIZE: ClassVar[int] = 100 + + +@dataclass_transform(kw_only_default=True, field_specifiers=(attrib, field)) +def parameters[T](cls: type[T]) -> type[T]: + return Parameters.register( + define( + cls, + kw_only=True, + field_transformer=parameters_post_init, + ) + ) + + +# noinspection PyUnusedLocal +def parameters_post_init(cls: type, fields_: list) -> list: + return [ + f.evolve(validator=is_allowed_paging) + if f.name == "paging" + else f.evolve(validator=is_registered_filter) + if f.name == "filter" + else f + for f in fields_ + ] + + +# noinspection PyUnusedLocal +def is_allowed_paging(inst, attr, value: Paging | None) -> None: + if value is None: + return + if value.page is not None and not 1 <= value.page < 2 ** 31: + raise ValueError("page number out of bounds") + if (value.pagesize is not None + and not 1 <= value.pagesize <= Parameters.MAX_PAGE_SIZE): + raise ValueError("page size out of bounds") + + +# noinspection PyUnusedLocal +def is_registered_filter(inst, attr, value: str | None) -> None: + if value is not None and value not in { + literal_value for literal_args_tuples in + # pylint: disable=no-member + get_args(BuiltInFilter.__value__ | BakedInFilter.__value__) + for literal_value in get_args(literal_args_tuples) + }: + raise ValueError("filter is not registered") + + +def path_param(vector_limit: int | None = None): + kwds: dict[str, Any] = {"metadata": {Parameters.PATH_PARAMETER_KEY: True}} + if vector_limit is not None: + kwds["metadata"][Parameters.VECTOR_LIMIT_KEY] = vector_limit + return field(**kwds) + + +@parameters +class QuestionsByIdsParameters(QuestionsByIdsParametersQuery): + ids: list[int] = path_param(vector_limit=Parameters.DEFAULT_VECTOR_LIMIT) + + +@parameters +class AnswersOnUsersParameters(AnswersOnUsersParametersQuery): + ids: list[int] = path_param(vector_limit=Parameters.DEFAULT_VECTOR_LIMIT) + + +@parameters +class QuestionsOnUsersParameters(QuestionsOnUsersParametersQuery): + ids: list[int] = path_param(vector_limit=Parameters.DEFAULT_VECTOR_LIMIT) + + +@parameters +class SimulateErrorParameters(SimulateErrorParametersQuery): + id: int = path_param() + + +@parameters +class CreateFilterParameters(CreateFilterParametersQuery): + pass + + +@parameters +class ReadFilterParameters(ReadFilterParametersQuery): + filters: list[str] = path_param(vector_limit=20) + + +@parameters +class SitesParameters(SitesParametersQuery): + pass + + +@parameters +class AssociatedUsersParameters(AssociatedUsersParametersQuery): + ids: list[int] = path_param(vector_limit=Parameters.DEFAULT_VECTOR_LIMIT) + + +# endregion + +# region YAML frontmatter metadata + + +class Metadata(metaclass=ABCMeta): + pass + + +@dataclass_transform(kw_only_default=True, + frozen_default=True, + field_specifiers=(attrib, field)) +def metadata[T](cls: type[T]) -> type[T]: + return Metadata.register(define(cls, frozen=True, kw_only=True)) + + +@metadata +class ShallowUserMetadata: + display_name: str | None = None + user_type: str | None = None + reputation: int | None = None + link: str | None = None + + +@metadata +class CommentMetadata: + score: int | None = None + creation_date: int | None = None + content_license: str | None = None + link: str | None = None + owner: ShallowUserMetadata | None = None + body_markdown: str | None = None + + +@metadata +class AnswerMetadata: + is_accepted: bool | None = None + awarded_bounty_amount: int | None = None + score: int | None = None + up_vote_count: int | None = None + down_vote_count: int | None = None + owner: ShallowUserMetadata | None = None + creation_date: int | None = None + last_edit_date: int | None = None + community_owned_date: int | None = None + content_license: str | None = None + share_link: str | None = None + comments: list[CommentMetadata] | None = None + + +@metadata +class QuestionMetadata: + title: str | None = None + tags: list[str] | None = None + view_count: int | None = None + score: int | None = None + up_vote_count: int | None = None + down_vote_count: int | None = None + owner: ShallowUserMetadata | None = None + creation_date: int | None = None + last_edit_date: int | None = None + community_owned_date: int | None = None + content_license: str | None = None + share_link: str | None = None + comments: list[CommentMetadata] | None = None + +# endregion diff --git a/src/stackexchange/serdes.py b/src/stackexchange/serdes.py new file mode 100644 index 0000000..c784f23 --- /dev/null +++ b/src/stackexchange/serdes.py @@ -0,0 +1,250 @@ +import datetime +import functools +from collections.abc import Callable, Generator +from typing import ( + Any, + get_args, + get_origin, +) + +import attrs +import cattrs.preconf.json +import cattrs.preconf.pyyaml +from cattrs import Converter, override +# noinspection PyProtectedMember +from cattrs.gen import ( + is_generic, # pyright: ignore [reportPrivateImportUsage] + make_dict_structure_fn, + make_dict_unstructure_fn, +) +from cattrs.preconf import wrap +from ruamel.yaml import YAML +from ruamel.yaml.scalarstring import LiteralScalarString + +# noinspection PyProtectedMember +from stackexchange.model import ( + CommentMetadata, + Metadata, + Parameters, + Response, + content_license_options, +) + +__all__ = [ + "query_converter", + "metadata_converter", +] + + +# region Query converter + + +def exclude_unneeded_keys[T](unstructure_hook: Callable[[T], dict[str, Any]]) \ + -> Callable[[T], dict[str, Any]]: + def wrapper(obj): + dct = unstructure_hook(obj) + # We exclude question.answers as they are split into their own files. + # comment.body is added only to circumvent a bug, it is not needed. + return {k: v for k, v in dct.items() + if v is not None and k not in {"answers", "body"}} + + return wrapper + + +def unstructure_parameters_query(conv: Converter, + params: Parameters) -> dict[str, Any]: + param_keys_to_exclude = [] + # noinspection PyDataclass, PyTypeChecker + for f in attrs.fields(type(params)): + if Parameters.PATH_PARAMETER_KEY in f.metadata: + param_keys_to_exclude.append(f.name) + elif isinstance(param := getattr(params, f.name), list): + setattr(params, f.name, unstructure_as_batched_vectors(param, f)) + params_as_dict: dict[str, Any] = conv.unstructure(params) + return flatten_dict(params_as_dict, + lambda k, v: + k in frozenset(param_keys_to_exclude) + or v is None) + + +def unstructure_as_batched_vectors(param: list[Any], + field_: attrs.Attribute) -> list[str]: + vector_limit = (field_.metadata.get(Parameters.VECTOR_LIMIT_KEY, + Parameters.DEFAULT_VECTOR_LIMIT) + if Parameters.PATH_PARAMETER_KEY in field_.metadata + else len(param)) + return [ + ";".join(str(e) for e in param[i:i + vector_limit]) + for i in range(0, len(param), vector_limit) + ] + + +# https://www.freecodecamp.org/news/how-to-flatten-a-dictionary-in-python-in-4-different-ways/ +def flatten_dict(dct: dict[str, Any], + exclude_func: Callable[[str, Any], bool]) -> dict[str, Any]: + return dict(flatten_dict_generator(dct, exclude_func)) + + +def flatten_dict_generator(dct: dict[str, Any], + exclude_func: Callable[[str, Any], bool]) \ + -> Generator[tuple[str, Any], None, None]: + for k, v in dct.items(): + if exclude_func(k, v): + continue + if isinstance(v, dict): + yield from flatten_dict(v, exclude_func).items() + else: + yield k, v + + +def structure_query_response[T](conv: Converter, + obj: dict[str, Any], + typ: type[Response[T]]) -> Response[T]: + response_type = get_args(typ)[0] + # noinspection PyTypeHints + items_structure_hook = conv.get_structure_hook(list[response_type]) + items = obj.pop("items") + response = Response(**obj) + # noinspection PyTypeHints + response.items = items_structure_hook(items, list[response_type]) + return response + + +# noinspection PyArgumentList +_json_converter = cattrs.preconf.json.make_converter(forbid_extra_keys=True) +_json_converter.register_unstructure_hook_factory( + attrs.has, + lambda cl: make_dict_unstructure_fn(cl, _json_converter) +) +_json_converter.register_structure_hook_factory( + attrs.has, + lambda cl: make_dict_structure_fn(cl, _json_converter) +) +query_converter = _json_converter.copy() +query_converter.register_unstructure_hook_factory( + attrs.has, + lambda cl: exclude_unneeded_keys( + make_dict_unstructure_fn(cl, query_converter) + ) +) +query_converter.register_unstructure_hook_func( + lambda cl: attrs.has(cl) and issubclass(cl, Parameters), + functools.partial(unstructure_parameters_query, _json_converter) +) +query_converter.register_structure_hook_func( + lambda cl: is_generic(cl) and get_origin(cl) is Response, + functools.partial(structure_query_response, query_converter) +) + + +# endregion + +# region Metadata converter + +# RuamelyamlConverter and make_yaml_converter are adjusted from +# the code in the cattrs.preconf.pyyaml module. +class RuamelyamlConverter[T](Converter): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.yaml = self.init_yaml() + + @classmethod + def init_yaml(cls) -> YAML: + # Always use the pure Python implementation + # to ensure more consistent behaviors across different environments. + return YAML(typ="rt", pure=True) + + # https://yaml.dev/doc/ruamel.yaml/api/#top + # If a parse or dump fails, and throws an exception, + # the state of the YAML() instance is not guaranteed + # to be able to handle further processing. + # We should, at that point, recreate the YAML instance before proceeding. + + def dumps(self, + obj: Any, + stream: Any | None = None, + unstructure_as: Any = None, + **kwargs: Any) -> Any: + try: + if "transform" not in kwargs: + kwargs["transform"] = lambda s: f"---\n{s}---\n" + return self.yaml.dump( + self.unstructure(obj, unstructure_as=unstructure_as), + stream, + **kwargs + ) + except Exception as e: + self.yaml = self.init_yaml() + raise e + + def loads(self, data: str, cl: type[T]) -> T: + try: + return self.structure(self.yaml.load(data), cl) + except Exception as e: + self.yaml = self.init_yaml() + raise e + + +@wrap(RuamelyamlConverter) +def make_yaml_converter(*args: Any, **kwargs: Any) -> RuamelyamlConverter: + kwargs["unstruct_collection_overrides"] = { + frozenset: list, + **kwargs.get("unstruct_collection_overrides", {}), + } + res = RuamelyamlConverter(*args, **kwargs) + cattrs.preconf.pyyaml.configure_converter(res) + return res + + +def epoch_time_to_date_str(seconds_since_epoch: int | None) -> str | None: + if seconds_since_epoch is not None: + try: + return (datetime.datetime + .fromtimestamp(seconds_since_epoch, tz=datetime.UTC) + .strftime("%Y-%m-%dT%H:%M:%SZ")) + except (OverflowError, ValueError, OSError): + return f"{seconds_since_epoch} seconds since the Unix epoch" + return None + + +def linkify_content_license(content_license: str | None) -> str | None: + if (content_license is not None + and (applicable_license := content_license_options + .get(content_license.removesuffix("?")))): + return (f"[{applicable_license.name}]({applicable_license.url})" + + ("?" if content_license.endswith("?") else "")) + else: + return content_license + + +metadata_converter = make_yaml_converter() +metadata_converter.register_unstructure_hook_factory( + lambda cl: attrs.has(cl) and issubclass(cl, Metadata), + lambda cl: exclude_unneeded_keys( + make_dict_unstructure_fn( + cl, + metadata_converter, + **{ # pyright: ignore [reportArgumentType] + field.name: override( + unstruct_hook=epoch_time_to_date_str + if field.name.endswith("_date") + and field.type in {"int | None", int | None} + else linkify_content_license + if field.name == "content_license" + else (lambda s: LiteralScalarString(s) if s else None) + if cl is CommentMetadata + and field.name == "body_markdown" + else None, + ) + for field in attrs.fields(cl) + } + ) + ) +) +metadata_converter.register_structure_hook_factory( + lambda cl: attrs.has(cl) and issubclass(cl, Metadata), + lambda cl: make_dict_structure_fn(cl, metadata_converter) +) + +# endregion diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..326d0b5 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,62 @@ +from http import HTTPMethod +from pprint import pprint + +import pytest +from requests import HTTPError + +from stackexchange.api import StackExchangeApi +from stackexchange.model import ( + CreateFilterParameters, + SimulateErrorParameters, +) +from stackexchange.serdes import query_converter + + +@pytest.fixture(scope="module", name="api") +def api_fixture(): + return StackExchangeApi() + + +def test_singleton_metaclass(api): + api1 = StackExchangeApi(api_key="key", access_token="token") + api2 = StackExchangeApi(api_key="key", access_token="token") + assert api is not api1 + assert api2 is api1 + + +def test_simulate_error(api): + with pytest.raises(HTTPError) as err: + api.simulate_error(SimulateErrorParameters(id=404)) + assert ((error_response := err.value.response) is not None + and error_response.json() == { + "error_id": 404, + "error_name": "no_method", + "error_message": "simulated", + }) + + +# An easier alternative to https://api.stackexchange.com/docs/create-filter +# for writing complex filters from scratch. +def test_create_filter_method_post(api): + created_filter = api.create_filter( + CreateFilterParameters( + include=[ + # You should almost always include these fields in the filter: + ".backoff", + ".has_more", + ".items", + ".quota_remaining", + # Specify the rest of your include fields below: + ], + exclude=[ + # You should almost always exclude these fields in the filter: + ".total", + # Specify the rest of your exclude fields below: + ], + base="none", + ), + http_method=HTTPMethod.POST, + ) + pprint(query_converter.unstructure(created_filter), + indent=2, + sort_dicts=False) diff --git a/tests/test_api_known_bugs.py b/tests/test_api_known_bugs.py new file mode 100644 index 0000000..053a564 --- /dev/null +++ b/tests/test_api_known_bugs.py @@ -0,0 +1,24 @@ +import pytest + +from stackexchange.api import StackExchangeApi +from stackexchange.model import AssociatedUsersParameters, Paging + + +@pytest.fixture(scope="module", name="api") +def api_fixture(): + return StackExchangeApi() + + +def test_associated_users_parameters_types_meta_site(api): + params = AssociatedUsersParameters( + ids=[6], + filter="!-0ttWpKaHtrB(oS", + paging=Paging(page=1, pagesize=1), + types=["meta_site"], + ) + response = next(api.associated_users(params, + auto_pagination=False, + items_only=False)) + assert response.total > 1 + if response.items == [] or response.has_more is False: + pytest.xfail("https://stackapps.com/q/8666/") diff --git a/tests/test_filters.py b/tests/test_filters.py new file mode 100644 index 0000000..2847049 --- /dev/null +++ b/tests/test_filters.py @@ -0,0 +1,151 @@ +from typing import ( + Literal, + TypeAlias, + TypeIs, + get_args, +) + +import pytest + +from stackexchange.api import StackExchangeApi +from stackexchange.model import ( + BakedInFilter, + Filter, + ReadFilterParameters, +) + +FilterType: TypeAlias = Literal["safe", "unsafe", "invalid"] + + +@pytest.fixture(scope="module", name="custom_filters") +def custom_filters_fixture(): + api = StackExchangeApi() + # pylint: disable=no-member + baked_in_filters = get_args(BakedInFilter.__value__) + return sorted( + api.read_filter(ReadFilterParameters(filters=list(baked_in_filters))), + key=lambda f: f.filter or "", + ) + + +@pytest.fixture(scope="module", name="defined_filters") +def defined_filters_fixture(): + def is_filter_type(filter_type: str) -> TypeIs[FilterType]: + assert filter_type in get_args(FilterType) + return True + + return sorted( + ( + Filter( + filter=filter_, + filter_type=filter_type, + included_fields=sorted(included_fields), + ) + for filter_, filter_type, included_fields in + ( + ( + "!-0ttWpKaHtrB(oS", + "safe", + [ + ".backoff", + ".has_more", + ".items", + ".quota_remaining", + ".total", + ], + ), + ( + "!2SUoF4c)sOul00Zq", + "safe", + [ + ".backoff", + ".has_more", + ".items", + ".quota_remaining", + "network_user.site_url", + "network_user.user_id", + ], + ), + # Due to a bug mentioned in + # https://meta.stackexchange.com/q/247899, + # we must also include comment.body in the filter + # in order to get comment.body_markdown in the response. + ( + # pylint: disable=line-too-long + "r8cwHZB3p97RraWJSdBqs7HCWXUCebDx9Wuhn_ChbmNDTEZ3_1lkd3suiMKEh6U-zwe.EML1(4mmULGTB", + "unsafe", + [ + ".backoff", + ".has_more", + ".items", + ".quota_remaining", + "answer.answer_id", + "answer.awarded_bounty_amount", + "answer.body_markdown", + "answer.comments", + "answer.community_owned_date", + "answer.content_license", + "answer.creation_date", + "answer.down_vote_count", + "answer.is_accepted", + "answer.last_edit_date", + "answer.owner", + "answer.question_id", + "answer.score", + "answer.share_link", + "answer.up_vote_count", + "comment.body", + "comment.body_markdown", + "comment.content_license", + "comment.creation_date", + "comment.link", + "comment.owner", + "comment.score", + "question.answers", + "question.body_markdown", + "question.comments", + "question.community_owned_date", + "question.content_license", + "question.creation_date", + "question.down_vote_count", + "question.last_edit_date", + "question.owner", + "question.question_id", + "question.score", + "question.share_link", + "question.tags", + "question.title", + "question.up_vote_count", + "question.view_count", + "shallow_user.display_name", + "shallow_user.link", + "shallow_user.reputation", + "shallow_user.user_type", + ], + ), + ( + "!6aC-iR(QLBu-5SKm", + "safe", + [ + ".backoff", + ".has_more", + ".items", + ".quota_remaining", + "answer.question_id", + ], + ), + ) + if is_filter_type(filter_type) + ), + key=lambda f: f.filter or "", + ) + + +@pytest.mark.parametrize( + ("custom", "defined"), + [("custom_filters", "defined_filters")], +) +def test_filters(custom, defined, request): + actual = request.getfixturevalue(custom) + expected = request.getfixturevalue(defined) + assert actual == expected diff --git a/tests/test_model.py b/tests/test_model.py new file mode 100644 index 0000000..3b569ba --- /dev/null +++ b/tests/test_model.py @@ -0,0 +1,160 @@ +import io +import warnings + +import attrs +import pytest + +from stackexchange.api import StackExchangeApi +# noinspection PyProtectedMember +from stackexchange.model import ( + Comment, + Paging, + Parameters, + Question, + QuestionMetadata, + ShallowUser, + SitesParameters, + parameters, + path_param, +) +# noinspection PyProtectedMember +from stackexchange.serdes import ( + metadata_converter, + query_converter, + unstructure_as_batched_vectors, +) + + +@pytest.fixture(scope="module", name="api") +def api_fixture(): + return StackExchangeApi() + + +@pytest.fixture(scope="module", name="params") +def params_fixture(): + Parameters.DEFAULT_VECTOR_LIMIT = 4 + + # pylint: disable=too-few-public-methods + @parameters + class CustomParameters: + p_vector: list[int] = path_param(vector_limit=2) + p_vector_default: list[int] = path_param() + p_vector_invalid: int = path_param(vector_limit=5) + p_primitive: str = path_param() + q_vector: list[str] | None = attrs.field( + default=None, + metadata={Parameters.VECTOR_LIMIT_KEY: 3} + ) + q_vector_default: list[str] | None = None + q_primitive: str | None = None + q_none: int | None = None + q_nested: Paging | None = None + + params = CustomParameters( + p_vector=[1, 2, 3], + p_vector_default=[1, 2, 3, 4, 5, 6, 7, 8, 9], + p_vector_invalid=4, + p_primitive="tag", + q_vector=["a", "b", "c", "d"], + q_vector_default=["e", "f", "g", "h", "i"], + q_primitive="f", + q_nested=Paging(page=2, pagesize=7), + ) + return params + + +def test_parameters_post_init(api): + page_size = 2 ** 31 - 2 + assert Parameters.MAX_PAGE_SIZE < page_size + with pytest.raises(ValueError): + SitesParameters(paging=Paging(pagesize=page_size)) + with attrs.validators.disabled(): + params = SitesParameters(paging=Paging(pagesize=page_size)) + # The following demo code is outside the scope of this test module, + # and should therefore not affect the outcome of this test case, + # even though the assertion statement is expected to pass. + try: + sites = next(api.sites(params, + auto_pagination=False, + items_only=False)) + assert ((paging := params.paging) is not None + and (pagesize := paging.pagesize) is not None + and Parameters.MAX_PAGE_SIZE < len(sites.items) < pagesize) + except Exception as e: # pylint: disable=broad-exception-caught + warnings.warn(UserWarning(f"sample code unexpectedly failed:\n{e}")) + + +def test_path_param_unstructure(params): + # noinspection PyDataclass, PyTypeChecker + fields_dict = attrs.fields_dict(type(params)) + for path_param_name, expected in ( + ("p_vector", ["1;2", "3"]), + ("p_vector_default", ["1;2;3;4", "5;6;7;8", "9"]), + ("p_vector_invalid", 4), + ("p_primitive", "tag"), + ): + path_param_ = getattr(params, path_param_name) + actual = (unstructure_as_batched_vectors(path_param_, + fields_dict[path_param_name]) + if isinstance(path_param_, list) + else path_param_) + assert actual == expected + + +def test_query_converter_unstructure(params): + actual = query_converter.unstructure(params) + expected = { + "q_vector": ["a;b;c;d"], + "q_vector_default": ["e;f;g;h;i"], + "q_primitive": "f", + "page": 2, + "pagesize": 7, + } + assert actual == expected + + +def test_metadata_converter_dumps(): + question = Question( + title="abc", + tags=["c", "c++"], + owner=ShallowUser( + user_type="registered", + display_name="anon", + ), + score=3, + creation_date=1295365453, + last_edit_date=4223295365453, + comments=[ + Comment( + creation_date=1246435483, + content_license="CC BY-SA 2.5", + body_markdown="No comment.", + ), + ], + ) + question_dict = query_converter.unstructure(question) + question_metadata = metadata_converter.structure(question_dict, + QuestionMetadata) + with io.StringIO() as output: + metadata_converter.dumps(question_metadata, output) + actual = output.getvalue() + expected = """--- +title: abc +tags: +- c +- c++ +score: 3 +owner: + display_name: anon + user_type: registered +creation_date: '2011-01-18T15:44:13Z' +last_edit_date: 4223295365453 seconds since the Unix epoch +content_license: '[CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)?' +comments: +- creation_date: '2009-07-01T08:04:43Z' + content_license: '[CC BY-SA 2.5](https://creativecommons.org/licenses/by-sa/2.5/)' + body_markdown: |- + No comment. +--- +""" + assert actual == expected