diff --git a/.github/workflows/SimPathsBuild.yml b/.github/workflows/SimPathsBuild.yml index db20a7574..cc340f2f5 100644 --- a/.github/workflows/SimPathsBuild.yml +++ b/.github/workflows/SimPathsBuild.yml @@ -22,7 +22,6 @@ jobs: with: java-version: '19' distribution: 'temurin' - cache: maven - name: Build with Maven run: mvn -B package --file pom.xml - uses: actions/upload-artifact@v4 @@ -39,7 +38,6 @@ jobs: with: java-version: '19' distribution: 'temurin' - cache: maven - name: Run integration tests run: mvn verify - name: Upload Actual CSVs for Inspection diff --git a/docs/figures/Chart Properties.png b/documentation/figures/GUI/Chart Properties.png similarity index 100% rename from docs/figures/Chart Properties.png rename to documentation/figures/GUI/Chart Properties.png diff --git a/docs/figures/Charts.png b/documentation/figures/GUI/Charts.png similarity index 100% rename from docs/figures/Charts.png rename to documentation/figures/GUI/Charts.png diff --git a/docs/figures/Output stream.png b/documentation/figures/GUI/Output stream.png similarity index 100% rename from docs/figures/Output stream.png rename to documentation/figures/GUI/Output stream.png diff --git a/docs/figures/SimPaths GUI.png b/documentation/figures/GUI/SimPaths GUI.png similarity index 100% rename from docs/figures/SimPaths GUI.png rename to documentation/figures/GUI/SimPaths GUI.png diff --git a/docs/figures/SimPaths parameters.png b/documentation/figures/GUI/SimPaths parameters.png similarity index 100% rename from docs/figures/SimPaths parameters.png rename to documentation/figures/GUI/SimPaths parameters.png diff --git a/docs/figures/SimPaths-Buttons.png b/documentation/figures/GUI/SimPaths-Buttons.png similarity index 100% rename from docs/figures/SimPaths-Buttons.png rename to documentation/figures/GUI/SimPaths-Buttons.png diff --git a/docs/figures/SimPaths-Chart-Zoom.png b/documentation/figures/GUI/SimPaths-Chart-Zoom.png similarity index 100% rename from docs/figures/SimPaths-Chart-Zoom.png rename to documentation/figures/GUI/SimPaths-Chart-Zoom.png diff --git a/documentation/figures/disability_module.png b/documentation/figures/modules/disability_module.png similarity index 100% rename from documentation/figures/disability_module.png rename to documentation/figures/modules/disability_module.png diff --git a/documentation/figures/education_module.png b/documentation/figures/modules/education_module.png similarity index 100% rename from documentation/figures/education_module.png rename to documentation/figures/modules/education_module.png diff --git a/documentation/figures/family_composition_module.png b/documentation/figures/modules/family_composition_module.png similarity index 100% rename from documentation/figures/family_composition_module.png rename to documentation/figures/modules/family_composition_module.png diff --git a/documentation/figures/fertility_module.png b/documentation/figures/modules/fertility_module.png similarity index 100% rename from documentation/figures/fertility_module.png rename to documentation/figures/modules/fertility_module.png diff --git a/documentation/figures/health_module.png b/documentation/figures/modules/health_module.png similarity index 100% rename from documentation/figures/health_module.png rename to documentation/figures/modules/health_module.png diff --git a/documentation/figures/mental_health_cases_module.png b/documentation/figures/modules/mental_health_cases_module.png similarity index 100% rename from documentation/figures/mental_health_cases_module.png rename to documentation/figures/modules/mental_health_cases_module.png diff --git a/documentation/figures/mental_health_levels_module.png b/documentation/figures/modules/mental_health_levels_module.png similarity index 100% rename from documentation/figures/mental_health_levels_module.png rename to documentation/figures/modules/mental_health_levels_module.png diff --git a/documentation/figures/partnership_module.png b/documentation/figures/modules/partnership_module.png similarity index 100% rename from documentation/figures/partnership_module.png rename to documentation/figures/modules/partnership_module.png diff --git a/documentation/figures/union_matching_module.png b/documentation/figures/modules/union_matching_module.png similarity index 100% rename from documentation/figures/union_matching_module.png rename to documentation/figures/modules/union_matching_module.png diff --git a/documentation/wiki/assets/css/extra.css b/documentation/wiki/assets/css/extra.css new file mode 100644 index 000000000..04fef6eca --- /dev/null +++ b/documentation/wiki/assets/css/extra.css @@ -0,0 +1,636 @@ +/* ═══════════════════════════════════════════════ + SimPaths Documentation — Custom Theme + Refined academic aesthetic +═══════════════════════════════════════════════ */ + +/* ── Self-hosted fonts (zero external requests) ── */ +@font-face { + font-family: "Inter"; + src: url("../fonts/Inter.woff2") format("woff2"); + font-weight: 100 900; + font-display: swap; +} + +@font-face { + font-family: "JetBrains Mono"; + src: url("../fonts/JetBrainsMono.woff2") format("woff2"); + font-weight: 100 800; + font-display: swap; +} + +:root { + --md-text-font: "Inter"; + --md-code-font: "JetBrains Mono"; +} + +/* ── CSS Variables ── */ +:root { + --sp-primary: #1e3050; + --sp-accent: #2478b5; + --sp-accent-light: #3b9ede; + --sp-accent-bright: #4fb3f0; + --sp-gradient: linear-gradient(135deg, #162540 0%, #1e3358 55%, #1a3550 100%); + --sp-hero-gradient: linear-gradient(135deg, #0f1c34 0%, #1e3358 50%, #163050 100%); + --sp-card-shadow: 0 4px 20px rgba(20,40,70,0.10); + --sp-transition: all 0.2s ease; +} + +/* ═══════════════════════════════════════════════ + MATERIAL THEME PALETTE +═══════════════════════════════════════════════ */ + +[data-md-color-primary="custom"] { + --md-primary-fg-color: #1e3050; + --md-primary-fg-color--light: #2a4570; + --md-primary-fg-color--dark: #12203a; + --md-primary-bg-color: #ffffff; + --md-primary-bg-color--light: rgba(255,255,255,0.7); +} + +[data-md-color-accent="custom"] { + --md-accent-fg-color: #2478b5; + --md-accent-fg-color--transparent: rgba(36,120,181,0.1); + --md-accent-bg-color: #ffffff; + --md-accent-bg-color--light: rgba(255,255,255,0.7); +} + +/* ═══════════════════════════════════════════════ + DARK MODE +═══════════════════════════════════════════════ */ + +[data-md-color-scheme="slate"] { + --md-default-bg-color: #111c2e; + --md-default-bg-color--light: #1a2940; + --md-default-bg-color--lighter: #223450; + --md-default-bg-color--lightest:#2c3f5c; + --md-code-bg-color: #1a2940; + --md-default-fg-color: rgba(255,255,255,0.92); + --md-default-fg-color--light: rgba(255,255,255,0.72); + --md-default-fg-color--lighter: rgba(255,255,255,0.48); + --md-default-fg-color--lightest:rgba(255,255,255,0.14); +} + +[data-md-color-scheme="slate"] .md-typeset a { + color: #5cb8f0 !important; +} +[data-md-color-scheme="slate"] .md-typeset a:hover { + color: #8dd0f7 !important; +} + +[data-md-color-scheme="slate"] .md-nav__item--active > .md-nav__link { + color: #5cb8f0 !important; +} +[data-md-color-scheme="slate"] .md-nav__link:hover { + color: #8dd0f7 !important; +} +[data-md-color-scheme="slate"] .md-nav__link--active { + color: #5cb8f0 !important; +} + +[data-md-color-scheme="slate"] .md-typeset .feature-card { + background: #1a2940; + border-color: rgba(255,255,255,0.06); +} +[data-md-color-scheme="slate"] .md-typeset .feature-card a { + color: #5cb8f0 !important; +} + +[data-md-color-scheme="slate"] .md-typeset .hero { + background: linear-gradient(135deg, #0c1624 0%, #162845 50%, #103048 100%); +} + +[data-md-color-scheme="slate"] .md-typeset .country-tag { + background: rgba(255,255,255,0.08); + color: rgba(255,255,255,0.78); +} + +[data-md-color-scheme="slate"] .md-typeset table:not([class]) th { + background: #1e3050; +} + +[data-md-color-scheme="slate"] .md-typeset .highlight { + box-shadow: 0 2px 12px rgba(0,0,0,0.3); +} + +[data-md-color-scheme="slate"] .md-typeset blockquote { + background: rgba(36,120,181,0.08); +} + +/* ═══════════════════════════════════════════════ + LINKS +═══════════════════════════════════════════════ */ + +.md-typeset a:not(.md-button):not(.headerlink):not(.md-nav__link):not(.card-link) { + color: var(--sp-accent); + text-decoration: none; + border-bottom: 1px solid transparent; + transition: color 0.15s ease, border-color 0.15s ease; +} + +.md-typeset a:not(.md-button):not(.headerlink):not(.md-nav__link):not(.card-link):hover { + color: var(--sp-primary); + border-bottom-color: var(--sp-primary); +} + +[data-md-color-scheme="slate"] .md-typeset a:not(.md-button):not(.headerlink):not(.md-nav__link):not(.card-link):hover { + color: #8dd0f7 !important; + border-bottom-color: #8dd0f7 !important; +} + +/* ═══════════════════════════════════════════════ + HEADER & NAVIGATION +═══════════════════════════════════════════════ */ + +.md-header { + background: var(--sp-gradient) !important; + box-shadow: 0 2px 16px rgba(20,40,70,0.35); +} + +.md-tabs { + background: rgba(255,255,255,0.05) !important; + border-top: 1px solid rgba(255,255,255,0.08); +} + +.md-tabs__link { + font-weight: 500; + font-size: 0.79rem; + letter-spacing: 0.02em; + opacity: 0.78; + transition: opacity 0.15s ease; +} +.md-tabs__link:hover, +.md-tabs__link--active { + opacity: 1; +} + +/* ═══════════════════════════════════════════════ + SIDEBAR +═══════════════════════════════════════════════ */ + +.md-nav__title { + font-weight: 600; + font-size: 0.68rem; + letter-spacing: 0.06em; + text-transform: uppercase; + color: var(--sp-accent) !important; + padding-bottom: 0.25rem; +} + +.md-nav__item--active > .md-nav__link { + font-weight: 600; + color: var(--sp-accent) !important; +} + +.md-nav__link { + font-size: 0.76rem; + transition: color 0.12s ease; +} + +/* ═══════════════════════════════════════════════ + CONTENT — TYPOGRAPHY +═══════════════════════════════════════════════ */ + +.md-content { + max-width: 820px; +} + +.md-typeset { + font-size: 0.82rem; + line-height: 1.6; +} + +.md-typeset h1 { + font-weight: 600; + font-size: 1.45rem; + letter-spacing: -0.015em; + border-bottom: 2px solid transparent; + border-image: var(--sp-gradient) 1; + padding-bottom: 0.3rem; + margin-bottom: 0.8rem; +} + +.md-typeset h2 { + font-weight: 600; + font-size: 1.25rem; + letter-spacing: -0.01em; + margin-top: 1.4rem; + margin-bottom: 0.45rem; + color: var(--md-default-fg-color); +} + +.md-typeset h3 { + font-weight: 600; + font-size: 1.1rem; + margin-top: 1rem; + margin-bottom: 0.3rem; + color: var(--md-default-fg-color); +} + +/* Paragraph justification */ +.md-typeset p { + margin-top: 0; + margin-bottom: 0.75em; + text-align: justify; + hyphens: auto; + -webkit-hyphens: auto; +} + +/* Tighter list spacing */ +.md-typeset ul, +.md-typeset ol { + margin-top: 0.3em; + margin-bottom: 0.7em; +} +.md-typeset ul li, +.md-typeset ol li { + margin-bottom: 0.2em; +} +.md-typeset ul li:last-child, +.md-typeset ol li:last-child { + margin-bottom: 0; +} + +/* Blockquote */ +.md-typeset blockquote { + border-left: 3px solid var(--sp-accent); + background: rgba(36,120,181,0.05); + border-radius: 0 6px 6px 0; + padding: 0.7rem 1.1rem; + margin: 1rem 0; + font-style: italic; + color: var(--md-default-fg-color--light); +} + +/* ═══════════════════════════════════════════════ + CODE BLOCKS +═══════════════════════════════════════════════ */ + +.md-typeset code { + border-radius: 4px; + font-size: 0.78em; + padding: 0.1em 0.35em; + background: rgba(36,120,181,0.07); + color: #1a5a8a; +} + +[data-md-color-scheme="slate"] .md-typeset code { + background: rgba(92,184,240,0.1); + color: #8dd0f7; +} + +.md-typeset pre > code { + font-size: 0.78em; + line-height: 1.65; + background: none; + color: inherit; + padding: 0; +} + +.md-typeset .highlight { + border-radius: 8px !important; + overflow: hidden; + box-shadow: 0 1px 10px rgba(0,0,0,0.07); + margin: 1em 0; +} + +/* ═══════════════════════════════════════════════ + ADMONITIONS & DETAILS +═══════════════════════════════════════════════ */ + +.md-typeset .admonition, +.md-typeset details { + border-radius: 8px !important; + border-left-width: 4px; + box-shadow: 0 1px 8px rgba(0,0,0,0.05); + font-size: 0.81rem; +} + +.md-typeset .admonition-title, +.md-typeset summary { + font-weight: 600; + font-size: 0.81rem; +} + +/* ═══════════════════════════════════════════════ + TABLES +═══════════════════════════════════════════════ */ + +.md-typeset table:not([class]) { + border-radius: 8px; + overflow: hidden; + box-shadow: 0 1px 8px rgba(0,0,0,0.06); + font-size: 0.84em; + width: 100%; + border-collapse: separate; + border-spacing: 0; +} + +.md-typeset table:not([class]) th { + background: var(--sp-gradient); + color: #fff; + font-weight: 600; + letter-spacing: 0.01em; + padding: 0.6rem 1rem; +} + +.md-typeset table:not([class]) td { + padding: 0.5rem 1rem; + border-bottom: 1px solid var(--md-default-fg-color--lightest); +} + +.md-typeset table:not([class]) tr:last-child td { + border-bottom: none; +} + +.md-typeset table:not([class]) tr:hover td { + background: rgba(36,120,181,0.04); +} + +/* ═══════════════════════════════════════════════ + HERO SECTION (homepage) +═══════════════════════════════════════════════ */ + +.md-typeset .hero { + background: var(--sp-hero-gradient); + border-radius: 14px; + padding: 2.2rem 2.2rem; + margin: 1rem 0 1.5rem; + color: #fff; + position: relative; + overflow: hidden; +} + +.md-typeset .hero::before { + content: ''; + position: absolute; + top: -50px; right: -50px; + width: 280px; height: 280px; + background: rgba(255,255,255,0.03); + border-radius: 50%; + pointer-events: none; +} + +.md-typeset .hero::after { + content: ''; + position: absolute; + bottom: -70px; left: -30px; + width: 220px; height: 220px; + background: rgba(36,120,181,0.08); + border-radius: 50%; + pointer-events: none; +} + +.md-typeset .hero h1 { + color: #fff !important; + font-size: 1.95rem; + font-weight: 600; + margin-bottom: 0.45rem; + line-height: 1.2; + border: none; + letter-spacing: -0.02em; +} + +.md-typeset .hero .tagline { + font-size: 0.88rem; + color: rgba(255,255,255,0.8); + margin-bottom: 1.4rem; + font-weight: 400; + max-width: 100%; + line-height: 1.6; + text-align: left; + hyphens: none; + -webkit-hyphens: none; +} + +/* ── Hero badges ── */ +.md-typeset .hero-badges { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + margin-bottom: 1.5rem; +} + +.md-typeset .hero-badge { + background: rgba(255,255,255,0.10); + border: 1px solid rgba(255,255,255,0.18); + border-radius: 999px; + padding: 0.2rem 0.65rem; + font-size: 0.7rem; + color: rgba(255,255,255,0.88); + font-weight: 500; + letter-spacing: 0.01em; +} + +/* ── Hero CTA buttons ── */ +.md-typeset .hero-cta { + display: flex; + gap: 0.7rem; + flex-wrap: wrap; +} + +.md-typeset .hero-cta a { + display: inline-flex; + align-items: center; + gap: 0.3rem; + border-radius: 7px; + padding: 0.45rem 1rem; + font-size: 0.78rem; + font-weight: 600; + text-decoration: none !important; + border-bottom: none !important; + transition: var(--sp-transition); + position: relative; + z-index: 1; +} + +.md-typeset .hero-cta .btn-primary { + background: #fff; + color: var(--sp-primary) !important; + box-shadow: 0 2px 10px rgba(0,0,0,0.15); +} +.md-typeset .hero-cta .btn-primary:hover { + background: #eef4fa; + transform: translateY(-1px); + box-shadow: 0 4px 16px rgba(0,0,0,0.2); +} + +.md-typeset .hero-cta .btn-outline { + background: transparent; + color: #fff !important; + border: 1.5px solid rgba(255,255,255,0.4) !important; +} +.md-typeset .hero-cta .btn-outline:hover { + background: rgba(255,255,255,0.1); + border-color: rgba(255,255,255,0.8) !important; + transform: translateY(-1px); +} + +/* ═══════════════════════════════════════════════ + FEATURE CARDS (homepage) +═══════════════════════════════════════════════ */ + +.md-typeset .card-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); + gap: 1rem; + margin: 1.2rem 0; +} + +.md-typeset .feature-card { + background: var(--md-default-bg-color); + border: 1px solid var(--md-default-fg-color--lightest); + border-radius: 10px; + padding: 1.2rem 1.3rem 1rem; + transition: var(--sp-transition); + position: relative; + overflow: hidden; +} + +.md-typeset .feature-card::before { + content: ''; + position: absolute; + top: 0; left: 0; + width: 100%; height: 3px; + background: var(--sp-gradient); + opacity: 0; + transition: opacity 0.2s ease; +} + +.md-typeset .feature-card:hover { + transform: translateY(-3px); + box-shadow: var(--sp-card-shadow); + border-color: rgba(36,120,181,0.25); +} + +.md-typeset .feature-card:hover::before { + opacity: 1; +} + +.md-typeset .feature-card .card-icon { + font-size: 1.3rem; + margin-bottom: 0.35rem; + display: block; + line-height: 1; +} + +.md-typeset .feature-card h3 { + font-size: 0.9rem !important; + font-weight: 600; + margin: 0 0 0.25rem !important; + padding: 0 !important; + color: var(--md-default-fg-color) !important; + border: none !important; +} + +.md-typeset .feature-card p { + font-size: 0.72rem; + color: var(--md-default-fg-color--light); + margin: 0 0 0.4rem; + line-height: 1.5; + text-align: left; +} + +.md-typeset .feature-card a { + font-size: 0.73rem; + font-weight: 600; + color: var(--sp-accent) !important; + text-decoration: none !important; + border-bottom: none !important; + display: inline-flex; + align-items: center; + gap: 0.15rem; + margin-top: 0.2rem; + transition: color 0.12s ease, gap 0.12s ease; +} + +.md-typeset .feature-card a:hover { + color: var(--sp-primary) !important; + gap: 0.25rem; +} + +/* ═══════════════════════════════════════════════ + COUNTRIES STRIP (homepage) +═══════════════════════════════════════════════ */ + +.md-typeset .countries { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + margin: 0.8rem 0; +} + +.md-typeset .country-tag { + background: #eef4fa; + color: #1e3050; + border-radius: 6px; + padding: 0.2rem 0.6rem; + font-size: 0.73rem; + font-weight: 500; +} + +/* ═══════════════════════════════════════════════ + FOOTER +═══════════════════════════════════════════════ */ + +.md-footer { + background: #12203a; +} + +.md-footer-meta { + background: rgba(0,0,0,0.18); +} + +.md-footer__link { + opacity: 0.75; + transition: opacity 0.12s ease; +} +.md-footer__link:hover { + opacity: 1; +} + +/* ═══════════════════════════════════════════════ + SEARCH +═══════════════════════════════════════════════ */ + +.md-search__form { + border-radius: 8px; + background: rgba(255,255,255,0.10) !important; + transition: background 0.15s ease; +} +.md-search__form:focus-within { + background: rgba(255,255,255,0.16) !important; +} + +/* ═══════════════════════════════════════════════ + BUTTONS +═══════════════════════════════════════════════ */ + +.md-typeset .md-button { + border-radius: 7px; + font-weight: 600; + font-size: 0.84rem; + transition: var(--sp-transition); +} +.md-typeset .md-button:hover { + transform: translateY(-1px); +} +.md-typeset .md-button--primary { + background: var(--sp-gradient); + border-color: transparent; +} + +/* ═══════════════════════════════════════════════ + RESPONSIVE +═══════════════════════════════════════════════ */ + +@media (max-width: 768px) { + .md-typeset .hero { + padding: 1.8rem 1.3rem; + } + .md-typeset .hero h1 { + font-size: 1.6rem; + } + .md-typeset .card-grid { + grid-template-columns: 1fr; + } +} diff --git a/documentation/wiki/assets/fonts/Inter.woff2 b/documentation/wiki/assets/fonts/Inter.woff2 new file mode 100644 index 000000000..d15208de0 Binary files /dev/null and b/documentation/wiki/assets/fonts/Inter.woff2 differ diff --git a/documentation/wiki/assets/fonts/JetBrainsMono.woff2 b/documentation/wiki/assets/fonts/JetBrainsMono.woff2 new file mode 100644 index 000000000..cd5102a44 Binary files /dev/null and b/documentation/wiki/assets/fonts/JetBrainsMono.woff2 differ diff --git a/documentation/wiki/developer-guide/how-to/add-gui-parameters.md b/documentation/wiki/developer-guide/how-to/add-gui-parameters.md new file mode 100644 index 000000000..069921dd7 --- /dev/null +++ b/documentation/wiki/developer-guide/how-to/add-gui-parameters.md @@ -0,0 +1,7 @@ +# GUI Parameters + +Managers (typically, the `Model`) can define some variables that are regarded as simulation parameters, in addition to those defined in the input database. These are control parameters that can be changed from the **GUI** before the simulation starts or while the simulation is running in order to experiment with the model behaviour in interactive mode. + +To this end, all the properties of the manager class that are marked with the `@GUIparameter` annotation (which replaces the deprecated `@ModelParameter` annotation) are regarded as model parameters. Accordingly, the **JAS-mine engine** generates a window for each defined manager, with the list of variables defined as parameters, so that the user can change them via the GUI before constructing and running the model. These parameters are added to the ones defined in the input database tables, for example to define specific simulation scenarios on the fly, in interactive mode. + +The persistence system integrated in JAS-mine records these values in a specific table of the output database, together with the date and time of execution of the simulation run. \ No newline at end of file diff --git a/documentation/wiki/developer-guide/how-to/index.md b/documentation/wiki/developer-guide/how-to/index.md new file mode 100644 index 000000000..15a939eb6 --- /dev/null +++ b/documentation/wiki/developer-guide/how-to/index.md @@ -0,0 +1,9 @@ +# How-To Guides + +Practical recipes for common development tasks in SimPaths. + +## Guides + +- [Introduce a New Variable](new-variable.md) — add a new simulated variable end-to-end +- [Add Parameters to the GUI](add-gui-parameters.md) — expose parameters in the JAS-mine GUI +- [Perform MultiRun Simulations](multirun-simulations.md) — run batch simulations diff --git a/documentation/wiki/developer-guide/how-to/multirun-simulations.md b/documentation/wiki/developer-guide/how-to/multirun-simulations.md new file mode 100644 index 000000000..b477eec43 --- /dev/null +++ b/documentation/wiki/developer-guide/how-to/multirun-simulations.md @@ -0,0 +1,59 @@ +# How to Perform MultiRun Simulations + +# 1. The MultiRun feature of JAS-mine + +There are many reasons why we may want to run and re-run our simulation many times. This may be to understand how the output of the model changes as a response to changes in the parameters of the model allowing for detailed design of experiments (DOE), parameter search / sensitivity analysis or optimization. It may also be because we want to understand the (Monte-Carlo) error or uncertainty in the output statistics of our model. In the first case, experimental design usually involves spanning over the values of the parameters, keeping the random number seed fixed. In the latter, it involves running the simulation a given number of times, without changing the values of the parameters (but changing the random number seed). + +JAS-mine provides the '[MultiRun](https://www.microsimulation.ac.uk/jas-mine/resources/cookbook/the-multirun-class/)' functionality to enable users to deal with these cases. + +Note that this functionality involves running a simulation many times in sequence on a single computer, as opposed to on parallel nodes. Future development work is intended to enable users of JAS-mine to easily set their simulations to run on parallel architecture, and tutorials will be added to describe this feature when it is ready. + +The MultiRun functionality of JAS-mine is created by a wrapper class that wraps around the model, thus removing the need to change any of the internal workings of the model itself. A MultiRun template class is included in the new JAS-mine project created using the [JAS-mine Plugin for Eclipse IDE](https://marketplace.eclipse.org/content/jas-mine-plugin-eclipse-ide), so that users can see the general structure of a MultiRun class: + +![JAS-mine MultiRun template](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-multirun.png) + +# 2. The components of the MultiRun concrete class + +The abstract `MultiRun` class exists in the JAS-mine-core libraries in the `microsim.engine` package. In order to implement a multi run simulation, the user must create a concrete class that extends the abstract `MultiRun` class. An example of the concrete MultiRun template class (called '<*Project Name*>MultiRun.java') can be found in the experiment package of a new JAS-mine project, and the class extends the abstract 'MultiRun' class from the JAS-mine-core libraries. The abstract methods `nextModel()` and `setupRunLabel()` must be overriden by the concrete template class. + +The `nextModel()` method should return a boolean which determines whether another new simulation should be launched. In the template example, the simulation is repeated *n* times for each of a specified population size of agents, but the boolean could instead depend, for instance, on whether a loop through a more complicated set of model parameters has terminated, signalling the completion of a parameter search experiment. + +The `setupRunLabel()` method provides a unique MULTI_RUN_ID name for each simulation run, which could signify a parameter in the model such as the country represented in the simulation for example; it is stored in the output database in the JAS_EXPERIMENT output table, as in the screenshot below: + +![JAS-mine MultiRun ID](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-multirun-id.png) + +The concrete `MultiRun` class must include a `main(String[] args)` function in which to launch the MultiRun version of the simulation. As this class is used to launch the simulations instead of the Start class, the concrete MultiRun class also needs to include a `buildExperiment()` method to replace the one used in the Start class. Note that when running multiple runs, it is often the case that the user will want to optimise speed of execution, so it is recommended not to invoke the Observer class in the `buildExperiment()`, nor enable the normal MicrosimShell gui that is normally initiated in the `main()` method of the Start class. Therefore in the template only the model and collector are constructed in the `buildExperiment()` method. And instead of the MicrosimShell gui, a useful progress monitor gui can be used within Eclipse, and is initiated with the `MultiRunFrame` invocation, as seen in the `main()` method of the MultiRun template class. In this example. the MultiRunFrame can be toggled using the '*executeWithGUI*' boolean field. See below for a screenshot of the MultiRunFrame: + +![JAS-mine MultiRun Frame](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-multirun-frame.png) + + +# 3. Executing the MultiRun simulation mode + +## 3.1 Within an IDE + +Running a MultiRun version of a JAS-mine project in an IDE is very easy. Here we refer to Eclipse IDE. Just right-click on the concrete class (named '<*Project Name*>MultiRun.java' if using the JAS-mine plugin for Eclipse IDE), and select 'Run As / Java Application' menu. The MultiRunFrame (as shown in the screenshot above) should pop up, and the simulation sequence can be started by clicking the 'Start' button. + +The progress bar of the MultiRunFrame will oscillate between empty and full colour, showing that progress is being made behind the scenes. The 'Current run number' represents the id of the current run, which counts the total number of simulations that have so far completed plus one. The 'Current run step' indicates the simulation run's internal time clock, i.e. a run step of 10.52 indicates that the events currently firing in the simulation were scheduled to occur at 10.52 time units. The 'Current step' is the run label, which is stored in the MULTI_RUN_ID of the JAS_EXPERIMENT table, and is the unique id of this particular simulation. As discussed in section 2 above, this could represent the combination of a model parameter such as the country that the run represents, and an index label, as in the case of the screenshots of the database and MultiRunFrame above. + +Once the MultiRun simulation is complete (i.e. the `nextModel()` method returns the false boolean), the MultiRunFrame will disappear and the MultiRun is over. + +## 3.2 Batch mode + +An alternative to executing the MultiRun functionality of a JAS-mine project from within an IDE is to use the batch environment (e.g. the command prompt in Windows, or the terminal in Linux). Indeed it may be necessary to use batch mode to launch JAS-mine simulations when running on a high performance computing facility. Users may also find that it is faster to execute their simulations in this way on their personal computers. + +When using Batch mode, users should disable the MultiRunFrame (in the template MultiRun class of the new JAS-mine project, this can be achieved by setting '*executeWithGUI*' boolean to false, see the next paragraph) and they should ensure that the Observer class is not invoked in the `buildExperiment()` method of the concrete MultiRun class; these are unnecessary in Batch mode and could slow the simulations down. + +#4. Setting the program arguments + +The number of times to run a simulation and use of the MultiRunFrame can be determined by passing program arguments to the run environment when launching the application (which are then used as the 'args field in the `main(String[] args)` method. This is done either: + +i) From the command prompt (in Windows) or terminal (in Linux) when launching the compiled classes with the command: +``` +java TestMultiRun -n 1000 -g false +``` + +where the `-n` flag sets the number of runs to 1000, and the `-g` flag sets the `executeWithGUI` field to false (which disables the MultiRunFrame). + +ii) Alternatively, the program arguments can be set in the IDE by clicking on the 'Run / Run Configurations' menu and setting the values in the 'Program arguments' window under the Arguments tab for the MultiRun application, as in the screenshot below: + +![JAS-mine program arguments](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-program-arguments.png) \ No newline at end of file diff --git a/documentation/wiki/developer-guide/how-to/new-variable.md b/documentation/wiki/developer-guide/how-to/new-variable.md new file mode 100644 index 000000000..b751b83a8 --- /dev/null +++ b/documentation/wiki/developer-guide/how-to/new-variable.md @@ -0,0 +1,79 @@ +# How to Introduce a New Variable + +# 1. An application using "Ethnicity" + +This page explains how to add a new variable to the SimPaths model. A similar approach can be used to add other variables. This example concerns the insertion of an additional variable, "Ethnicity", named `dot01`, defined in six categories, and whose issue is detailed [here](https://github.com/centreformicrosimulation/SimPaths/issues/212). + +# 2. Update the input data + +Before starting, the user must ensure that they have the updated version of the data for the model, which shall include the variable(s) to be added to the code. If not, they should get the [UKHLS](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=6914) (Understanding Society) and [WAS](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=7215) (Wealth and Assets Survey) data from the [UK Data Service](https://ukdataservice.ac.uk/) (DS) and generate the input data for the model using these datasets and the [do files from GitHub](https://github.com/centreformicrosimulation/SimPaths/tree/main/input/InitialPopulations/compile). + + +# 3. Load the `dot01` variable in SimPaths + +## 3.1 Define a 6-category Ethnicity enum +In the enums folder, create an [enum class](https://www.microsimulation.ac.uk/jas-mine/resources/key-java-concepts/enums/) for the variable, as illustrated in the image below: + +![image](https://www.dropbox.com/scl/fi/80yt50stokc57jgext4hl/Capture-d-cran-2025-07-21-11.47.05.png?rlkey=9boedmt4vezy5k1s2x7t2ue55&st=la9nsni0&raw=1) + +## 3.2 Add the `dot01` variable to the list of individual characteristics +In the Parameters.java file, there are string arrays that define which variables are loaded from the donor populations and the initial populations files. As `dot01` is available only in the initial population files, it has to be added to the person variable initial array (`PERSON_VARIABLES_INITIAL`). See image below: + +![image](https://www.dropbox.com/scl/fi/zes4qd1nq4qnk35k8lsrl/Capture-d-cran-2025-07-21-12.14.35.png?rlkey=mlxi0cq5bfm94uxpwgxs0n9dt&st=6pr9ss1c&raw=1) + + +# 4. Modify the SQL Tables + +At this point, the code that generates the SQL tables needs to be modified to make sure that these tables include the new categorical variable `dot01`. To this end, the DataParser.java file in the startingpop folder must be updated. In particular, the new chunk of code has to be added to the `parse()` method (see image below): + +![image](https://www.dropbox.com/scl/fi/sqyb6648vd6irbcnydi9o/Capture-d-cran-2025-07-21-14.30.50.png?rlkey=98wkyfkqbrkgrlyimpqyamjh0&st=c60k8v9t&raw=1) + +When the method is expanded, in the block `try{}`, there are the SQL commands to insert the various persons' characteristics in the tables. Here, the lines to insert the ethnicity should be added, paying attention to use the same categorical specification given in the Ethnicity enum class (see image below): + +![image](https://www.dropbox.com/scl/fi/skct50a3g348jkfowpgix/Capture-d-cran-2025-07-21-15.07.16.png?rlkey=chlxw1vvlx0bod5ppiussfrtw&st=zzctbn8b&raw=1) + + +# 5. Define the Ethnicity variable in the Person class + +The Person class is one of the core parts of the model, as it is the blueprint of individuals in SimPaths. Here, the new variable should be `@Enumerated()`, which is a Java annotation used on enum fields in classes to tell the persistence provider (like _Hibernate_ or _Jakarta_) how to save the enum into a database column. In other words, it allows to map the Java object/instance into the SQL table. +Practically, the code will look like in the image below: + +![image](https://www.dropbox.com/scl/fi/pkl98w4frp91t72416o9l/Capture-d-cran-2025-07-21-17.11.29.png?rlkey=wa7k5h19rvwk9zgqighcdvec5&st=8ng1yqg6&raw=1) + +Now that the variable has been added to the Person class, it should be provided with a getter/setter. To do so, it is sufficient to right-click on the variable name`dot01`, then "Generate" > "Getter"/"Setter" (see image below): + +![image](https://www.dropbox.com/scl/fi/uw70ocxzz2w1v0mw0muoj/Capture-d-cran-2025-07-21-17.45.48.png?rlkey=1hib889nubsen1jl7uc7cdeb7&st=2vaspz49&raw=1) + +Once added, the Getter and Setter should look as in the image below, and they should be moved at the end of the file together with those of other variables: + +![image](../../jasmine-reference/collection-filters.md) + +After the variable is inserted with getters and setters, the [constructors](https://www.digitalocean.com/community/tutorials/constructor-in-java#constructor-overloading-in-java) in the Person class must be updated to include this new variable. For Ethnicity, there are two constructors at play. The first one, `public Person (Person originalPerson, long seed, SampleEntry sampleEntry) {...}`, is the one that is used to clone the person. The second one, `public Person(Gender gender, Person mother) {...}`, is the one for the new born, which takes as argument the gender and the mother and "creates" a child. As ethincity is assumed to be taken after the mother's, it will be sufficient to add a line `dot01 = mother.getDot01();`, where it is stated so (_i.e._, the person's ethnicity - `dot01` - is equal to the person's mother's one `mother.getDot01()`). + + + +# 6. Update the Regressors List class + +The regressions' estimates for education, fertility, health, etc. stored in /SimPaths/input (originally obtained by running the do files in /SimPaths/input/InitialPopulations/compile/RegressionEstimates) indicate what covariates enter each regression. As Ethnicity is now part of the initial populations' variables, it can also be used for one or more of these regressions. Therefore, similarly to all the other covariates, it must be listed in the `public enum DoublesVariables {}` list (in the Section _implements IDoubleSource for use with Regression classes_) in the Person.java file.[1](#footnote-1) +First, the user should make sure that the variable is coded exactly as it is inserted in the regressions. For example, in our case, Ethnicity enters the regressions in the form of four dummy variables (whose the first one is excluded as residual): +`Ethn_White,` (=1 if the individual is White; =0 otherwise) +`Ethn_Asian,` (=1 if the individual is Asian; =0 otherwise) +`Ethn_Black,` (=1 if the individual is Black; =0 otherwise) +`Ethn_Other,` (=1 in all the other cases; =0 otherwise) +Thus, this list should be coded identically under the `public enum DoublesVariables {}` list in the Person class. +Secondly, the user has to "populate" the values of these variables, following the specification defined in the Ethnicity enum created earlier. Below the `public enum DoublesVariables {}` list in the Person class, there is a method defined as follows, where this procedure is carried out for all the variables in the `DoublesVariables {}` list. + +```java + public double getDoubleValue(Enum variableID) { + switch ((DoublesVariables) variableID) {...} } +``` +In particular, the functioning of the four binary variables defined above should be elaborated within the `switch` control list, as illustrated below.: + +![image](https://www.dropbox.com/scl/fi/8dtyshltvfjodhx8931xl/Capture-d-cran-2025-07-23-14.24.29.png?rlkey=nrp5xf0slnvovl09xpy9v9ry3&raw=1) + + +# 7. Conclusions + +After having completed all these steps, the variable will be available in SimPaths. It is the user's responsibility to run the model until the final simulation step to make sure that all the changes have been implemented correctly and that the model functions with the new variable. + +[1] When a regression is run on benefit units (rather than persons), the corresponding list is `public enum Regressors {}` in the BenefitUnit.java file. diff --git a/documentation/wiki/developer-guide/index.md b/documentation/wiki/developer-guide/index.md new file mode 100644 index 000000000..6a1320a3d --- /dev/null +++ b/documentation/wiki/developer-guide/index.md @@ -0,0 +1,126 @@ +# Developer Guide + +SimPaths is a Java project based on the JAS-mine simulation libraries. + +![JAS-mine logo](https://www.microsimulation.ac.uk/wp-content/uploads/2026/01/LOGO_NEW_TEXT.png) + +JAS-mine extends Java functionalities and provide an architectural template for dynamic microsimulation and agent-based models, aimed at improving the clarity and transparency of the model structure. + +New developers of SimPaths are strongly recommended to familiarise themselves with the JAS-mine architecture, and in particular: +* [JAS-mine GitHub repository](https://github.com/jasmineRepo) +* [JAS-mine core API](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/index.html) +* [JAS-mine GUI API](https://raw.githack.com/jasmineRepo/JAS-mine-gui/master/microsim-gui/doc/index.html) +* [JAS-mine documentation](https://www.microsimulation.ac.uk/jas-mine/) +* [JAS-mine reference paper](http://microsimulation.org/IJM/V10_1/IJM_2017_10_1_4.pdf) + +*** + +## 1. Guiding principles +
+ Clarity +A clear distinction is made in JAS-mine between objects with a modelling content, which specify the structure of the simulation, and objects which perform useful but auxiliary tasks, from enumerating categorical variables to building graphical widgets, from creating filters for the collection of agents to computing aggregate statistics to be saved in the output database. + +JAS-mine extends the _Model-Observer_ paradigm introduced by the [Swarm](http://www.swarm.org/wiki/Main_Page) experience and introduces a new layer in simulation modelling, the _Collector_. + +* The **Model** deals mainly with specification issues, creating objects, relations between objects, and defining the order of events that take place in the simulation. +* The **Collector **collects the data and compute the statistics both for use by the simulation objects and for post-mortem analysis of the model outcome, after the simulation has completed. +* The **Observer** allows the user to inspect the simulation in real time and monitor some pre-defined outcome variables as the simulation unfolds. +This three-layer methodological protocol allows for extensive re-use of code and facilitates model building, debugging and communication. + +Moreover, JAS-mine envisages **strict separation between the code and the data**, with all parameters and input tables stored either in an input database or in specific MS Excel files. The regression package provides tools for simulating outcomes from standard regression models (OLS, probit/logit, multinomial, ordered and generalised ordered models): in particular, there is no need to specify the variables that enter a regression model, as they are directly read from the data files. This greatly facilitates exploration of the parameter space, testing different econometric specifications, and scenario analysis. +
+ +
+ Transparency + +Transparent coding for transparent modelling is achieved by + +1. enforcing a strict adherence to the **open source** paradigm, which makes it less of a black-box with respect to proprietary software and encourages cooperative development of the platform by the community of users: all functions can be inspected and, if necessary, modified or extended. + +2. allowing the user to choose from a wide range of classes and interfaces which **extend the standard Java language**, rather than providing an ad-hoc grammar and syntax. but The JAS-mine libraries therefore provide open tools to “manufacture” a simulation model, making use whenever possible of solutions already available in the software development community (external functions can also be easily added as plug-ins). This also ensures a maximum amount of flexibility in model building. +
+ +*** + +## 2. Architecture +SimPaths shares with all JAS-mine projects some architectural choices. + +
+ Model-Collector-Observer + +The [Swarm protocol](https://www.swarm.org/wiki/Swarm_main_page) for agent-based platforms architecture recommends splitting the simulation into an internal *Model* and an external *Observer*. These two aspects of the artificial world should remain markedly separate. + +The purpose of the *Observer* is to inspect the model's objects. Through the *Observer* the state of simulation can be monitored and graphically represented in real time, while the simulation is running. However, for the purpose of analysis and validation, the *Observer* alone may not be adequate, because it implies the need to define in advance the aggregations on which to analyze the simulation outcome. A variation in perspective requires re-running the experiment. + +According to a different approach, the simulation is aimed exclusively at producing numerical outputs which can be analyzed in depth ex-post using ad-hoc statistical-econometric tools. + +JAS-mine combines these two different approaches extending the *Model-Observer* paradigm so as to include an intermediate structure that calculates statistical values and persists simulation modelling outputs in the database in the most transparent way, minimizing the impact on model implementation. In the JAS-mine architecture agents are organized and managed by components called managers. There are three types of managers: *Model*, *Collector* and *Observer*. + +* The **_Model_** deals mainly with specification issues, creating objects, relations between objects, and defining the order of events that take place in the simulation. +* The **_Collector_** collects the data and computes the statistics both for use by the simulation objects and for post-mortem analysis of the model outcome, after the simulation has completed. +* The **_Observer_** allows the user to inspect the simulation in real time and monitor some pre-defined outcome variables as the simulation unfolds. + +This three-layer methodological protocol allows for extensive re-use of code and facilitates model-building, debugging and communication. + +JAS-mine allows multiple *Models* (and multiple *Collectors* and *Observers*) to run simultaneously, since they share the same scheduler (known as a singleton). This allows for the creation of complex structures where agents of different *Models* can interact. Each *Model* is implemented in a separate Java class that creates the objects and plans the schedule of events for that *Model*. *Model* classes require the implementation of the `SimulationManager` interface, which implies the specification of a `buildObjects()` method to build objects and agents, and a `buildSchedule()` method for planning the simulation events. Analogously, *Collector* classes must implement the `CollectorManager` interface, and *Observer* classes must implement the `ObserverManager` interface. +
+ + +
+ The JAS-mine engine + +The core of the JAS-mine toolkit is represented by the simulation engine. It is based on the standard discrete-event simulation paradigm, which allows to manage the time with high flexibility and multi-scale perspective. + +The JAS-mine engine is based on the scheduler, which handles all the events in the simulation. The scheduler is a “singleton” (in software engineering, the singleton pattern is a design pattern that restricts the instantiation of a class to one object), which means that all the agents in the simulation share the same scheduler. Events can be scheduled in advance (for instance once every simulation period) or dynamically, by the agents themselves (for instance, job termination is scheduled upon hiring). This allows to implement both continuous-time and discrete-time simulations. + +
+🔴 FOCUS: Time in simulation + +The abstract representation of a continuous phenomenon in a simulation model requires that all events be presented in discrete terms. + +With some confusion in the notation, **discrete-event** computer simulations can be cast either in **discrete time** or in **continuous time**. + +With **discrete time**, time is broken into regular (equi-spaced) time slices (∆t) and the simulator calculates the variation of state variables for all the elements of the simulated model between one point in time and the next. Nothing is known about the order of the events that happen within each time period: discrete events (marriage, job loss, etc.) could have happened at any moment in ∆t while inherently continuous events (ageing, wealth accumulation, etc.) are best thought to progress linearly between one point in time and the next. + +By contrast, simulations cast in **continuous time** are characterized by irregular timeframes that are punctuated by the occurrence of discrete events. Between consecutive events, no change in the system is assumed to occur; thus the simulation can directly jump in time from one event to the next. Inherently continuous events must be discretized. + +The event list orders the events and the simulation is performed by extracting the event that is closest in time and submitting it to the model's agents, which change their state according to the signal (corresponding to the event) they have received. In the case of continuous simulations, the order of the processes that are applied must be exogenously assumed (and the assumption must be coherent with the specification of the model used for estimating the coefficients governing each process). The events may also be generated and scheduled not only in the initial planning phase but also while running the simulation. +
+ +
+ +
+ Input-Output communication + +Data management is a major factor to be weighed in for the creation of a simulation tool. Building on the vast number of software solutions available, JAS-mine allows the user to separate data representation and management from the implementation of processes and behavioral algorithms. + +One distinguishing feature of the platform lies in the integration with relational database management systems (RDBMS) through ad-hoc Java libraries. The management of input data persistence layers and simulation results in JAS-mine is performed using standard database management tools, and the platform takes care of the automatic translation of the relational model of the database into the object-oriented simulation framework thanks to an ORM layer. + +
+🔴 FOCUS: Object-Relational Mapping (ORM) + +The software paradigm that is best suited to represent and manipulate population data is object-oriented programming (OOP). On the other hand, input and output data (especially in complex projects) are best stored in a relational database. Unfortunately, database relational modelling is less intuitive than OOP and requires a specific language (SQL) to retrieve and modify data. + +In JAS-mine the interaction between the simulation and the (input and output) data is inspired by Object-Relational Mapping (ORM), a programming approach that facilitates the integration of object-oriented software systems with relational databases. An ORM product (JAS-mine uses [Hibernate](http://hibernate.org/orm/)) constructs an object-oriented interface to provide services on data persistence, while abstracting at the same time from the implementation characteristics of the specific RDBMS (database management software) used. The management of input data persistence layers and simulation results is performed using standard database management tools, and the platform takes care of the automatic translation of the relational model (which is typical of a database) into the object-oriented simulation model, where each category of individuals or objects that populate the model is represented by a specific class, with its own properties and methods. + +![Hibernate Position](http://www.tutorialspoint.com/images/hibernate_position.jpg) + +The main advantages of using an ORM system are: + +1. the masking of the implementation of the relational model in an object-oriented model; +2. high portability compared to the DBMS technology adopted: no need to rewrite data input queries on database when changing DBMS, simply modify a few lines in the configuration of the ORM used; +3. a drastic reduction in the amount of code to be written; the ORM masks the complex activities of data creation, extraction, update and deletion behind simple commands. These activities take up a considerable proportion of the time required for writing, testing and maintenance. Moreover they are inherently repetitive, thus increasing the chance of errors when writing the implementation code. + +The most common ORM products available today offer a number of functions that would otherwise be performed manually by the programmer; in particular, the operations of loading the object graph based on association links defined at language level, and reading/writing/deleting are entirely automated. For instance, loading an instance of the `Student` class may result in the automatic loading of data concerning the student's exam grades. + +The use of an ORM facilitates the achievement of higher quality software standards, in particular improving its correctness, maintainability, potential evolutions and portability. On the down side, choosing an ORM paradigm introduces a software layer that impacts on performance, an aspect that is relevant to data-intensive applications like simulations. Translating the entity-relational model that is typical of a database into an object-based model requires additional activities that may slow down data upload and reading. Given the continuous increases in the speed and power of modern computers, we opted for a lean architectural structure even at the cost of slowing down the simulation engine. +
+ +This also allows to separate data creation from data analysis, which is crucial for understanding the behaviour of the simulation model. As the statistical analysis of the model output is possibly intensive in computing time, performing it in real time might be an issue, in large-scale applications. A common solution is to limit real-time monitoring of simulation outcomes to a selected subset of output variables. This however requires identifying the output of interest before the simulation is run. If additional computations are required to better understand how the model behaves, the model has to be run again: the bigger the model, the more impractical this solution is. + +On the other hand, the power of modern RDBMS make it feasible to keep track of a much larger set of variables, for later analysis. Also, the statistical techniques envisaged, and the specific modeler’s skills, might suggest the use of external software solutions, without the need to integrate them in the simulation machine. + +Finally, keeping data analysis conceptually distinct from data production further enhances the brevity, transparency and clarity of the code. +
+ + diff --git a/documentation/wiki/developer-guide/internals/api.md b/documentation/wiki/developer-guide/internals/api.md new file mode 100644 index 000000000..7ec3d830a --- /dev/null +++ b/documentation/wiki/developer-guide/internals/api.md @@ -0,0 +1,81 @@ +# SimPaths API + +SimPaths APIs are published [here](https://centreformicrosimulation.github.io/SimPaths/javadoc/). + +# 1. Introduction + +The SimPaths API documentation is generated using [Maven's Javadoc Plugin](https://maven.apache.org/plugins/maven-javadoc-plugin/). + +Javadoc is a Java tool that automatically generates HTML documentation from [Javadoc comments](https://www.oracle.com/uk/technical-resources/articles/java/javadoc-tool.html) embedded in the source code. + +The documentation website is updated automatically whenever a commit is pushed to the `develop` branch of SimPaths. This process is handled via GitHub Actions using a [Javadoc-publisher workflow developed by MathieuSoysal](https://github.com/MathieuSoysal/Javadoc-publisher.yml). The workflow file is available [here](https://github.com/centreformicrosimulation/SimPaths/blob/develop/.github/workflows/publish-javadoc.yml). + +To update the API documentation, add or modify Javadoc comments in the source code following [this guide](https://www.oracle.com/uk/technical-resources/articles/java/javadoc-tool.html), then push your changes to the `develop` branch. + +# 2. Workflow Details + +[The workflow](https://github.com/centreformicrosimulation/SimPaths/blob/develop/.github/workflows/publish-javadoc.yml) automates the generation and publishing of HTML documentation from Javadoc comments whenever changes are pushed to the `develop` branch. + +``` +on: + push: + branches: + - develop # Only publish when pushing to develop branch +``` + +The code is checked out from the `develop` branch, Java 19 is installed, and SimPaths is compiled. + + +``` +jobs: + publish-javadoc: + runs-on: ubuntu-latest + permissions: + contents: write # Needed to push to the javadoc branch + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up JDK 19 + uses: actions/setup-java@v3 + with: + java-version: '19' + distribution: 'temurin' + cache: maven + + - name: Build (optional if Javadoc needs compiled sources) + run: mvn -B compile --file pom.xml +``` + +The documentation is then generated from the Javadoc comments in the code. + + +``` +- name: Generate Javadoc + run: mvn javadoc:javadoc --file pom.xml +``` + +Finally, the generated documentation is deployed to the `javadoc` branch of the SimPaths repository. + + +``` +- name: Deploy Javadoc to branch + uses: MathieuSoysal/Javadoc-publisher.yml@v3.0.2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + javadoc-branch: javadoc + java-version: 19 + target-folder: javadoc # Specifies the folder in which the documentation is saved + project: maven +``` + +The published documentation is hosted using [GitHub Pages](https://docs.github.com/en/pages/getting-started-with-github-pages/configuring-a-publishing-source-for-your-github-pages-site). GitHub Pages is a service that hosts static websites directly from a GitHub repository. + +GitHub pages is combined with the GitHub Actions workflow to ensure that the documentation is always up to date without the need for manual deployment: + +- The `javadoc` branch contains the generated HTML API documentation. +- GitHub Pages is configured to use this branch as the site’s content source. +- Each time the GitHub Actions workflow updates the `javadoc` branch, GitHub Pages automatically refreshes the live site. + + diff --git a/documentation/wiki/developer-guide/internals/file-organisation.md b/documentation/wiki/developer-guide/internals/file-organisation.md new file mode 100644 index 000000000..742a1ef3a --- /dev/null +++ b/documentation/wiki/developer-guide/internals/file-organisation.md @@ -0,0 +1,5 @@ +# File Organisation + +!!! warning "In progress" + This page is under development. Contributions welcome — + see the [Developer Guide](../index.md) for how to contribute. diff --git a/documentation/wiki/developer-guide/internals/index.md b/documentation/wiki/developer-guide/internals/index.md new file mode 100644 index 000000000..25aa30320 --- /dev/null +++ b/documentation/wiki/developer-guide/internals/index.md @@ -0,0 +1,11 @@ +# SimPaths Internals + +This section documents the internal structure of the SimPaths codebase — how it is organised, its key classes, and how JAS-mine architecture is implemented. + +## Sections + +- [SimPaths API](api.md) — public API reference +- [File Organisation](file-organisation.md) — directory and package layout +- [The SimPathsModel Class](simpaths-model.md) — the central model class +- [Start Class Implementation](start-class-implementation.md) — SimPaths-specific start class +- [MultiRun Implementation](multirun-implementation.md) — SimPaths-specific MultiRun class diff --git a/documentation/wiki/developer-guide/internals/multirun-implementation.md b/documentation/wiki/developer-guide/internals/multirun-implementation.md new file mode 100644 index 000000000..ab2f03dfd --- /dev/null +++ b/documentation/wiki/developer-guide/internals/multirun-implementation.md @@ -0,0 +1,5 @@ +# MultiRun Implementation + +!!! warning "In progress" + This page is under development. Contributions welcome — + see the [Developer Guide](../index.md) for how to contribute. diff --git a/documentation/wiki/developer-guide/internals/schemes.md b/documentation/wiki/developer-guide/internals/schemes.md new file mode 100644 index 000000000..d0aef389e --- /dev/null +++ b/documentation/wiki/developer-guide/internals/schemes.md @@ -0,0 +1,14 @@ +# SimPaths Schemes + +![SimPaths structure_Page_1](https://github.com/user-attachments/assets/3c686541-6e63-48fc-bc84-8b98a422b1fc) +![SimPaths structure_Page_2](https://github.com/user-attachments/assets/a298fa9c-7dbc-4379-8ce8-8e630aba1a28) +![SimPaths structure_Page_3](https://github.com/user-attachments/assets/e2e8d64f-94c4-4436-a155-e0a91e73b694) +![SimPaths structure_Page_4](https://github.com/user-attachments/assets/737b8961-ab67-4b16-8c4b-3171527c3f19) +![SimPaths structure_Page_5](https://github.com/user-attachments/assets/fbdabdb0-bf53-4691-a162-52ece12b5b4e) +![SimPaths structure_Page_6](https://github.com/user-attachments/assets/4b9597c3-a2aa-465f-82e2-bcadc80ba7a4) +![SimPaths structure_Page_7](https://github.com/user-attachments/assets/6b83c6c2-0f84-4edc-97f8-c10fd77028bd) + +**Below another graphical representation:** +![Data Scheme_Page_1](https://github.com/user-attachments/assets/f8ecc79e-6c54-4488-8ff5-ffba6ee91edc) +![Data Scheme_Page_2](https://github.com/user-attachments/assets/2f13110d-a875-42a8-8442-65ebdc0e6d51) +![Data Scheme_Page_3](https://github.com/user-attachments/assets/5e5cfa78-0984-4577-84b6-205a69f2e02a) diff --git a/documentation/wiki/developer-guide/internals/simpaths-model.md b/documentation/wiki/developer-guide/internals/simpaths-model.md new file mode 100644 index 000000000..17335d7a1 --- /dev/null +++ b/documentation/wiki/developer-guide/internals/simpaths-model.md @@ -0,0 +1,80 @@ +# The SimPathsModel Class + +``_Under Construction_`` + +This page complements [4.05 - The Model and the Schedule] by explaining how SimPaths instantiates the generic JAS-mine scheduling framework in practice. It documents current SimPaths practice, not theory or design justification. It is not a complete specification of the model's behaviour, and should be read together with the codebase and validation documentation. + +## 1. The SimPaths model manager + +The core simulation logic in SimPaths is implemented in a model manager class that extends _AbstractSimulationManager_ and implements EventListener. + +The SimPaths model class functions primarily as a coordinator of simulation structure. Its primary responsibilities are to initialise the population and global data structures, define the temporal structure of the simulation via schedules, and respond to aggregate-level events. + +### **Model construction**: + +The **buildObjects()** method defines the initial simulation state before any time evolution takes place. +In SimPaths, this phase typically includes: +* Initialising random number generators, including the creation of separate random streams for different modules (e.g. matching, alignment, initialisation), to reduce unintended coupling between processes. +* Loading global parameters and projections for the simulation horizon. +* Preparing auxiliary infrastructures, such as indices for tax-benefit donor data. +* Creating the initial population by loading or constructing Person, BenefitUnit, and Household objects, including any required population expansion or preprocessing. +* Initialising internal state variables (e.g. the simulation year counter). + +### Simulation execution schedule + +The temporal structure of the simulation is defined in the **buildSchedule()** method. This method specifies the sequence of events that govern the evolution of the simulated population and determines how processes are executed over time. All model dynamics are implemented as events managed by the JAS-mine discrete-event simulation engine. + +``` java +@Override +public void buildSchedule() { + + addEventToAllYears(Processes.StartYear); + + addEventToAllYears(Processes.UpdateParameters); + addEventToAllYears(Processes.GarbageCollection); + + addCollectionEventToAllYears(benefitUnits, BenefitUnit.Processes.Update); + addCollectionEventToAllYears(persons, Person.Processes.Update); + + // ... +} + +``` + +The schedule distinguishes between two execution regimes. A first schedule is applied to the initial simulation year, when many individual attributes are inherited directly from the input data and only a subset of behavioural processes must be evaluated. A second schedule governs all subsequent years and is repeated at regular annual intervals until the end of the simulation horizon. + +``` java +EventGroup firstYearSched = new EventGroup(); +EventGroup yearlySchedule = new EventGroup(); + +// ... + +getEngine().getEventQueue().scheduleOnce(firstYearSched, startYear, ordering); +getEngine().getEventQueue().scheduleRepeat(yearlySchedule, startYear+1, ordering, 1.); +``` + +Model processes are executed before data collection and observation components operating at the same simulation time. This ensures that monitoring and persistence routines operate on fully updated system states. The initial-year schedule runs once at the simulation start year, after which the standard yearly schedule repeats at fixed annual intervals. + +``` java +int orderEarlier = -1; + +getEngine().getEventQueue().scheduleOnce( + new SingleTargetEvent(this, Processes.CleanUp), endYear+1, orderEarlier +); + +SystemEvent end = new SystemEvent(SimulationEngine.getInstance(), SystemEventType.End); +getEngine().getEventQueue().scheduleOnce(end, endYear+1, orderEarlier); +``` + +In SimPaths, this mechanism is implemented through the JAS-mine EventListener interface. +The model defines an enumeration (Processes) that lists all model-level operations that can be triggered during simulation. When an event fires, the simulation engine delivers the corresponding enumeration value to the target object. The object’s onEvent() method interprets this identifier and invokes the associated process. + +Scheduling, process identification, and process implementation are therefore separated. The event schedule determines when processes occur, the enumeration defines which processes exist, and the event handler executes them. + +## 2. Dynamic event scheduling + +Events in SimPaths may be defined during model construction or generated while the simulation is running. Processes can schedule additional events through the simulation engine when future actions must occur conditionally or at non-regular times. + +All dynamically created events are inserted into the same global event list and are executed according to simulation time and event ordering rules. Once scheduled, dynamically generated events are treated identically to pre-scheduled events. This allows model evolution to depend on realised states rather than only on predetermined schedules. + + diff --git a/documentation/wiki/developer-guide/internals/start-class-implementation.md b/documentation/wiki/developer-guide/internals/start-class-implementation.md new file mode 100644 index 000000000..7375c052d --- /dev/null +++ b/documentation/wiki/developer-guide/internals/start-class-implementation.md @@ -0,0 +1,81 @@ +# Start Class Implementation + +The [SimPathsStart](https://github.com/centreformicrosimulation/SimPaths/blob/main/src/main/java/simpaths/experiment/SimPathsStart.java) class is the entry point for running the SimPaths microsimulation model. It provides essential functionalities for initializing the simulation environment and offers methods for configuring simulation parameters, database setup, and user interactions. + +# 1. Overview + +This class handles the following primary functionalities: + +1. **Displaying a GUI:** Users can define startup processes, such as selecting policies, modifying policies, or rebuilding the database, using the dialog box presented by this class. + +2. **Selecting simulation country and start year:** The class adjusts country and start year based on user's choice. + +3. **Starting the Simulation Engine:** It initializes the JAS-mine simulation engine, optionally creating and displaying a graphical user interface for the simulation. + +4. **Selecting and Starting an Experiment:** The `buildExperiment` method configures various components of the SimPaths model, including the model itself, a collector, and an observer. + +5. **Creating Database Tables:** The `createDatabaseTables` method facilitates the creation of initial and donor population database tables based on user choices. + + +# 2. Methods and Functionality + +
+The main method + +The `main` method serves as the entry point for running the SimPaths microsimulation model. It initializes simulation parameters, displays a GUI, and starts the JAS-mine simulation engine. + +
+ +
+The buildExperiment method + +This method is called by the JAS-mine simulation engine to configure the components of the SimPaths model, including the model itself, a collector, and an observer. + +
+ +
+The runGUIdialog method + +The `runGUIdialog` method allows users to define startup processes for the simulation through a dialog box. Options include running the GUI, selecting policies, modifying policies, and rebuilding the database. + +
+ +
+The createDatabaseTables method + +This method is responsible for creating database tables required for the simulation. Users can choose to create initial population tables, donor population tables, or both, thus setting up the necessary database environment. + +
+ +
+The chooseCountryAndStartYear method + +This method displays a GUI for selecting the country and starting year for the simulation. Users make choices via combo-boxes, and the selected values set the simulation's country and starting year. Additionally, the method saves these choices to an Excel file for future use. + +
+ +
+The constructAggregatePopulationCSVfile(Country country) method + +This method constructs a CSV file by aggregating data from multiple UKMOD/EUROMOD output text files for a specific country. It extracts relevant columns and creates a CSV file that serves as input data for the creation of donor database tables. + +
+ +
+The createInitialDatabaseTablesFromCSVfile(Country country) method + +This method builds initial population database tables from initial population CSV files. These tables represent the initial population for a specific country and starting year and are foundational for running simulations in the JAS-mine model. + +
+ +
+The populateDonorTaxUnitTables(Country country) method + +This method populates donor tax unit tables with data from UKMOD/EUROMOD. It gathers information on gross and net income, demographic characteristics and benefits and stores it in the database. The method calculates various attributes related to tax units and adds them to the database tables. + +
+ + +# 3. Usage +Compiling and running the `SimPathsStart` class launches the app. + diff --git a/documentation/wiki/developer-guide/jasmine/index.md b/documentation/wiki/developer-guide/jasmine/index.md new file mode 100644 index 000000000..1aa381a60 --- /dev/null +++ b/documentation/wiki/developer-guide/jasmine/index.md @@ -0,0 +1,17 @@ +# JAS-mine Architecture + +SimPaths is built on the [JAS-mine](https://www.microsimulation.ac.uk/jas-mine/) simulation libraries. Understanding the JAS-mine architecture is essential for developing or modifying SimPaths. + +New developers are recommended to start with the following external resources: + +- [JAS-mine GitHub repository](https://github.com/jasmineRepo) +- [JAS-mine core API](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/index.html) +- [JAS-mine documentation](https://www.microsimulation.ac.uk/jas-mine/) + +## Sections + +- [Project Structure](project-structure.md) — the structure of a JAS-mine project +- [The Model and the Schedule](model-and-schedule.md) — how events are scheduled +- [The Start Class](start-class.md) — the entry point of the simulation +- [The MultiRun Class](multirun-class.md) — batch simulation runs +- [Updating JAS-mine](updating-jasmine.md) — how to upgrade the JAS-mine dependency diff --git a/documentation/wiki/developer-guide/jasmine/model-and-schedule.md b/documentation/wiki/developer-guide/jasmine/model-and-schedule.md new file mode 100644 index 000000000..b17b3f3b6 --- /dev/null +++ b/documentation/wiki/developer-guide/jasmine/model-and-schedule.md @@ -0,0 +1,101 @@ +# The Model and the Schedule + +Note: This Section refers to a generic JAS-mine project + +# 1.The SimulationManager interface + +All JAS-mine Models should either implement the *SimulationManager* interface or extend the *AbstractSimulationManager* class. +Two methods are required in a Model class. + +* The ***buildObjects*()** method should contain the instructions to create all the agents and the objects that represent the virtual environment for model execution. +* The ***buildSchedule*()** method should contain the planning of the events for the simulation. + +Collectors and Observers follow a similar logic, the only difference being that Collectors must implement the *CollectorManager* interface, and Observers must implement the *ObserverManager* interface. The Demo07 model describes those classes in details. + +In the following example the first instruction loads a list of agents, instances of the Agent class, from an input database table using the JAS-mine integrated ORM system: + +```java +public void buildObjects() { + + agentsList = (List) DatabaseUtils.loadTable(Agent.class); +} +``` + +Events are planned based on a discrete event simulation paradigm. This means that events can be scheduled dynamically at specific points in time. The frequency of repetition of an event can be specified in case of recurring events characterized by a specific frequency. An event can be created for a specific recipient. In particular, an event can be created and managed by the simulation engine (a system event, e.g. simulation stops), it can be sent to all the components of a collection or list of agents or it can be sent to a specific object/instance. Events can be grouped together if they share the same schedule. + +```java +public void buildSchedule() { + + EventGroup s = new EventGroup(); + s.addCollectionEvent(agentsList, Agent.Processes.Age); + s.addEvent(this, Processes.MarriageMatch); + getEngine().getEventList().scheduleRepeat(s, 0.0, 0, 1.0); + getEngine().getEventList().scheduleOnce(new SingleTargetEvent(this, Processes.Stop), 2.0, Order.AFTER_ALL.getOrdering()); + +} +``` + +In the example above a group of events (labeled *s*) is created to be run for the first time at time 0.0 and then repeated at each interval of 1.0 time units. Within an event group, events are run sequentially, as specified in the code. The first event in the event group *s* (*Age*) is sent to all the individuals in the simulated population and entails aging. The second event is targeted to the model itself and entails running the *MarriageMatch*() method, which forms couples based on a some matching algorithm. The event group *s* is added to the model's schedule in repeat mode using the *scheduleRepeat*() method. Finally, the end of the simulation is scheduled once using the *scheduleOnce*() method at time t=2.0 and is notified to the model itself. + +# 2. The Ordering of Events + +The signature of the *scheduleOnce*() method is: + +**scheduleOnce(Event event, double atTime, int withOrdering),** + +whilst the signature of the scheduleRepeat() method is: + +**scheduleRepeat(Event event, double atTime, int withOrdering, double timeBetweenEvents).** + +Note the use of the '*withOrdering*' integer field. This is used to specify the order in which events scheduled at the same time are fired; events with lower values of the *withOrdering* field will fire first. For example, if two events are scheduled to occur at time 10.52, then if event A was scheduled with the *withOrdering* field set to 0 whereas event B was scheduled with *withOrdering* set to -1, event B will fire before event A. If two events are scheduled with both the same *atTime* and *withOrdering*, the event that was added to the schedule earlier in the simulation will be fired first. It is therefore important that the ordering of events scheduled for the same time only share the same value of the *withOrdering* field if it doesn't matter what order the events need to be fired in. + +There are two standard ordering values: + +* Order.BEFORE_ALL.getOrdering() which should be reserved to schedule events that need to be fired before all other events scheduled at the same time. +* Order.AFTER_ALL.getOrdering() which should be reserved to schedule events that need to be fired after all other events scheduled at the same time. This ordering value is used in the example above, where it specifies that the Processes.Stop event only be fired after all events scheduled for the same time. Note, that it might be desirable to record the data of the simulation just before stopping the Processes.Stop event, which could be achieved by scheduling the collector to dump the persisted data to the database at the same time as the Processes.Stop, but with the *withOrdering* field of the event set as Order.AFTER_ALL.getOrdering() **– 1**, so that it is fired just before the Processes.Stop event. + +# 3. The EventListener interface + +A class can receive and process events after implementing the *EventListener* interface and defining the onEvent method that will receive specific enumerations to be interpreted. + +In the example, the model defines an enum called *Processes* as follows: + +```java +public enum Processes { + + MarriageMatching, + Stop; + +} +``` + +The *onEvent*() method decodes this object and performs the required action: + +```java +public void onEvent(Enum type) { + + switch ((Processes) type) { + + case MarriageMatching: + […] + break; + case Stop: + getEngine().pause(); + break; + + } +} +``` + +Analogously, the Agent class also defines an enum called *Processes*, which in this example contains the *Age* case. + + +# 4. Dynamic Scheduling + +Note that events can be scheduled dynamically and need not be planned in advance when constructing the model. For instance, events can be added by the agents themselves, based on their behavioural rules. This simply requires accessing the event list through a singleton instance of the simulation engine, with the following instruction: + +```java +SimulationEngine.instance.getEventList(); +``` + + diff --git a/documentation/wiki/developer-guide/jasmine/multirun-class.md b/documentation/wiki/developer-guide/jasmine/multirun-class.md new file mode 100644 index 000000000..c3b56b43a --- /dev/null +++ b/documentation/wiki/developer-guide/jasmine/multirun-class.md @@ -0,0 +1,61 @@ +# The MultiRun Class + +In a JAS-mine project, the *MultiRun* class replaces the *Start* class when the user wants to repeatedly run simulations using different parameter values, so as to explore the space of solutions and produce sensitivity analyses on the specified parameters. For information on how to use the multi run functionality, see the tutorial. Like the *Start* class, the *MultiRun* class serves to initialize and run the JAS-mine simulation engine and to define the list of models to be used. Unlike the *Start* class, the user will not generally want to make use of the interactive mode and the JAS-mine GUI, as often time constraints require maximising the speed of the simulations in a multi run situation; using the Observer and JAS-mine GUI will only slow the simulation speed down, so these will not likely be used in a standard multi run set-up. The MultiRun class is therefore designed to handle the following: + +* performing a multi run of the simulation in **batch mode**, through the creation of the *Model* and possibly the *Collectors*; this involves managing parameter setup, model creation and execution directly, and is aimed at capturing only the simulation's numerical output. + +The abstract MultiRun class exists in the JAS-mine-core libraries in the microsim.engine package. In order to implement a multi run simulation, the user must create a concrete class that extends the abstract *MultiRun* class. An example of the concrete MultiRun template class (called '<*Project Name*>MultiRun.java') can be found in the experiment package of a new JAS-mine project, and the class extends the abstract *MultiRun* class from the JAS-mine-core libraries. Below, we create an example of a concrete *TestMultiRun*, which extends the abstract *MultiRun* class. As this class is used to launch simulations instead of the *Start* class, *TestMultiRun* must include a main(String[] args) function in which to launch the multi run version of the simulation. In the example below, to facilitate running in batch mode via the command line interface (such as Window's Command Prompt or the Linux Terminal), we can use program arguments to set parameters via the String[] args. For more information see the section 4 of the MultiRun tutorial. The abstract *MultiRun* class implements the *ExperimentBuilder* interface, so the *TestMultiRun* class must also define the *buildExperiment*() method. This method should create managers and add them to the JAS-mine engine. In the example below, a model called *TestModel* is created along with a collector called *TestCollector*, and the simulation is run in batch mode: + +```java +@Override +public void buildExperiment(SimulationEngine engine) { + + TestModel model = new TestModel(); + + engine.addSimulationManager(model); + + TestCollector collector = new TestCollector(model); + engine.addSimulationManager(collector); + +} +``` + +Note that when running multiple runs, it is often the case that the user will want to optimise speed of execution, so it is recommended not to invoke the Observer class in the *buildExperiment*(), nor enable the MicrosimShell gui that is normally used in interactive mode and initiated in the main() method of the Start class. Therefore in the our *TestMultiRun* example, only the model and collector are constructed in the *buildExperiment*() method. Instead of the MicrosimShell gui, a useful progress monitor gui can be used within Eclipse, and is initiated with the *MultiRunFrame* invocation, as seen in the main() method of the *TestMultiRun* class. In this example, the MultiRunFrame can be toggled using the '*executeWithGUI*' boolean field. Here's a screenshot of the MultiRunFrame GUI: + +![MultiRunFrame](https://www.microsimulation.ac.uk/wp-content/uploads/page/MultiRunFrame.png) + +There are two abstract methods in the abstract *MultiRun* class – *nextModel*() and *setupRunLabel*() – that must be overriden by the concrete *TestMultiRun* class. + +The *nextModel*() method should return a boolean which determines whether another new simulation should be launched. In the *TestMultiRun* example, the simulation is repeated a number of times equal to *numberOfRepeatedRuns* for each value *k\***numberOfAgents* of the population size, with *k =* 1*…K*, but the boolean could instead depend, for instance, on whether a loop through a more complicated set of model parameters has terminated, signalling the completion of a parameter search experiment. + +```java +@Override +public boolean nextModel() { + + // Update the values of the parameters for the next experiment + counter++; + + if(counter > numberOfRepeatedRuns) { + numberOfAgents *= 10; // Increase the number of agents by a factor of 10 + // for the next experiment + counter = 1L; // Reset counter + } + + // Define the continuation condition + if(numberOfAgents < maxNumberOfAgents) { // Stop when the numberOfAgents goes above // maxNumberOfAgents + return true; + } + else return false; +} +``` + +The *setupRunLabel*() method provides a unique MULTI_RUN_ID name (a string) for each simulation run. This can be the current run number, as provided by *counter.toString()*, or a more "telling" label, as: + +```java +@Override +public String setupRunLabel() { + return numberOfAgents.toString() + " agents, count: " + counter.toString(); +} +``` + +The labels are stored in the output database in the JAS_EXPERIMENT output table. diff --git a/documentation/wiki/developer-guide/jasmine/project-structure.md b/documentation/wiki/developer-guide/jasmine/project-structure.md new file mode 100644 index 000000000..b7809a1d6 --- /dev/null +++ b/documentation/wiki/developer-guide/jasmine/project-structure.md @@ -0,0 +1,30 @@ +# Project Structure + +In the JAS-mine architecture, agents are organized and managed by components called managers. As already mentioned, there are three types of managers in this architecture: [Model, Collector and Observer](https://www.microsimulation.ac.uk/jas-mine/resources/focus/model-collector-observer/). Models serve to build artificial agents and objects, and to plan the time structure of events. Collectors are managers that build data structures and routines to calculate (aggregate) statistics dynamically, and that build the objects used for data persistence. The definition of a Collector's schedule specifies the frequency of statistics updating and agent sampling, and consequent storage in the output database. Observers are managers that serve to build graphical widget objects that indicate the state of the simulation in real time, and define the frequency with which to update these objects. + +JAS-mine allows multiple Models (and multiple Collectors and Observers) to run simultaneously, since they share the same [scheduler](https://www.microsimulation.ac.uk/jas-mine/resources/cookbook/the-model-and-the-schedule/). This allows for the creation of complex structures where agents of different Models can interact. Each Model is implemented in a separate Java class that creates the objects and plans the schedule of events for that Model. Model classes require the implementation of the `SimulationManager` interface, which implies the specification of a `buildObjects` method to build objects and agents, and a `buildSchedule` method for planning the simulation events. Analogously, Collector classes must implement the `CollectorManager` interface, and Observer classes must implement the `ObserverManager` interface. + +When a new JAS-mine project is created using the [JAS-mine plugin for Eclipse IDE](https://www.microsimulation.ac.uk/jas-mine/how-to-create-and-run-a-new-jas-project-using-eclipse), several packages are created: + +* **data**: a package containing the classes that describe the structure of coefficients, parameters and agent population tables contained in the database to be loaded by the ORM. When using Excel files to specify input data, no specific classes need to be included in this package. +* **model**: a package containing the classes that specify the model structure; in particular, it contains the Model manager class(es) and the class(es) of agents that populate the simulation. +* **model.enums**: a subpackage containing the definition of the enumerations used (if any). Enumerations specify a set of predefined values that a property can assume. These values might be categorical (strings, e.g. sex), quantitative (discrete numbers, e.g. age) or even objects with their set of characteristics and properties (e.g. a predefined set of banks to which a firm can be linked). The ORM detects that a value is an enumeration when the property is declared with the annotation `@Enumerated` (see below). Through enumerations the ORM automatically manages reading/writing operations in both text and numerical format. +* **experiment**: a package containing the classes that deal with running the simulation experiment(s); it contains, in particular, the Start class where the main method and the type of the experiment (interactive vs. batch mode, single run vs. multiple runs) are defined. The package might also contain one or more Collector and/or Observer manager classes for online statistics collection and display, and a MultiRun class that manages repeated runs for parameter exploration. +* **algorithms**: a package containing classes that implement algorithms for determining events and applying processes to the agents. These implementations, in a cooperative effort of users, are potential candidates to extend the set of standard functions included in the JAS-mine libraries. + +In addition to sources (stored in the *src* folder), the project also contains two folders for data input-output. The input folder contains data and parameters in MS Excel or H2 embedded formats. The output folder contains the output of different simulation experiments and is initially empty. At the beginning of each run, JAS-mine creates a sub-folder that is labelled automatically with an appropriate time stamp, so as to uniquely identify the experiment (e.g. 20141218151116, for experiments initiated on the 18th December 2014, at 16 seconds after 3.11pm). The subfolder contains a copy of all the input files and an empty output database, with the same structure of the input database as defined by the annotations added to the model classes. Coherence between the input database (if any), the output database and the classes representing the agents in the simulation (known as entity classes) is guaranteed by the ORM. + +By default, JAS-mine executes the simulations in embedded mode: the [databases](https://www.microsimulation.ac.uk/jas-mine/concept/io) are modified directly by the JDBC driver included in JAS-mine. The standard database uses a H2 database engine. Other databases supporting embedding can be used, such as Microsoft Access, Hypersonic SQL, Apache Derby, etc. + +Two additional folders are created, labelled *target* and *lib,* with technical content of no immediate interest to the modeller. The root folder also contains a *pom* (**project object model**) file, which contains information on the JAS-mine version used for the project. Apache [Maven](http://maven.apache.org/), an open source software project management and comprehension tool uses this information to manage all the project's build, reporting and documentation. In particular, by specifying in the pom file the desired release for each library used (including the JAS-mine libraries), [Maven automatically downloads](https://www.microsimulation.ac.uk/jas-mine/resources/tutorials/how-to-update-jasmine/) the relevant libraries from the appropriate repositories. This implies that each JAS-mine project has its own copy of all the libraries used, ensuring that the project is self-contained and that it keeps working exactly as intended even when new versions of the libraries are released (and even if backward compatibility is not respected). + +| **Folder name** | **contains** | +| --- | --- | +| input | data and parameters | +| output | initially empty | +| src | all Java classes | +| target | compiled classes, JARs, etc. | +| libs | external libraries (if any) | +| (root) | pom.xml | + +**Table: the JAS-mine file structure** \ No newline at end of file diff --git a/documentation/wiki/developer-guide/jasmine/start-class.md b/documentation/wiki/developer-guide/jasmine/start-class.md new file mode 100644 index 000000000..bd36102b0 --- /dev/null +++ b/documentation/wiki/developer-guide/jasmine/start-class.md @@ -0,0 +1,38 @@ +# The Start Class + +In a JAS-mine project, the *Start* class serves to initialize and run the JAS-mine simulation engine and to define the list of models to be used. The *Start* class is designed to handle two types of situations: + +* performing a single run of the simulation in **interactive mode**, through the creation of a Model and related *Collectors* and *Observers*, with their GUIs; +* performing a single run of the simulation in **batch mode**, through the creation of the *Model* and possibly the *Collectors*; this involves managing parameter setup, model creation and execution directly, and is aimed at capturing only the simulation's numerical output; + +Note that in order to run the simulation many times, it is necessary to use the the MultiRun class instead of the Start class. For more information, see this tutorial. + +The *Start* class must implement the *ExperimentBuilder* interface, which defines the *buildExperiment*() method. This method should create managers and add them to the JAS-mine engine. In the example below, a model called *DemoModel* is created and run in interactive mode: + +```java +public static void main(String[] args) { + + boolean showGui = true; + SimulationEngine engine = SimulationEngine.getInstance(); + MicrosimShell gui = null; + if (showGui) { + gui = new MicrosimShell(engine); + gui.setVisible(true); + } + engine.setBuilderClass(StartDemo.class); + engine.setup(); + +} + +@Override +public void buildExperiment(SimulationEngine engine) { + + DemoModel model = new DemoModel(); + PersonsCollector collector = new PersonsCollector(model); + PersonsObserver observer = new PersonsObserver(model, collector); + engine.addSimulationManager(model); + engine.addSimulationManager(collector); + engine.addSimulationManager(observer); + +} +``` \ No newline at end of file diff --git a/documentation/wiki/developer-guide/jasmine/updating-jasmine.md b/documentation/wiki/developer-guide/jasmine/updating-jasmine.md new file mode 100644 index 000000000..a7f6994a8 --- /dev/null +++ b/documentation/wiki/developer-guide/jasmine/updating-jasmine.md @@ -0,0 +1,40 @@ +# Updating JAS-mine + +# 1. Using Apache Maven + +The easiest way to update the JAS-mine libraries in your project is to use Apache Maven. Maven now comes pre-installed with most IDEs. Details slightly change based on the IDE, the instructions below refer to Eclipse (version Luna). + +Open the `pom.xml` file in Eclipse and go to the Dependencies tab. Select the libraries to update. Click on the Manage button. + +![JAS-mine dependencies](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-dependencies.png) + +The dependency will now display on the right column, under the "Dependency Management" heading: + +![JAS-mine dependencies managed](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-dependencies-managed.png) + +Select it, and click on the Properties button. Then, update the version and press OK. + +![JAS-mine dependency properties](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-dependency-properties.png) + +You should now see the new version of the JAS-mine library in the dependencies list: + +![JAS-mine dependencies changed](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-dependencies-changed.png) + +To update the .jars in the project, you may have to get Maven to update them. This is done by right clicking on the project in Eclipse's 'Package Explorer' window, then choosing the 'Maven / Update Project' menu, as in the screenshot below. Click OK on the window that pops up, and Maven should automatically download the new JAS-mine libraries and add them to the project. + +![JAS-mine maven update](https://www.microsimulation.ac.uk/wp-content/uploads/documentation/JAS-mine-maven-update.png) + + +# 2. Manual update + +An alternative and more involved way to update the JAS-mine libraries is to manually update an existing project to a new version of JAS-mine. This is done by manually downloading the new `JAS-mine-core-with-dependencies` and `JAS-mine-gui-with-dependencies` .jar files from the [JAS-mine download area](https://sourceforge.net/projects/jas-mine/files/Libraries/), and referring to them in your project. In order to do it, right-click on the project you want to update. Select Properties, then go to the Java Build Path tab on the vertical menu and on the Libraries tab on the horizontal menu. Select the old JAS-mine .jars, and remove them. After removal, no JAS-mine libraries should be present in the Libraries tab, as in the screenshot below: + +![JAS-mine build path](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-build-path.png) + +After [downloading](https://github.com/jasmineRepo) the JAS-mine libraries, copy these files to the 'libs' folder in the Eclipse project (create a 'libs' folder if there isn't one in the project). Then select both JAS-mine .jar files and right click, select 'Build Path / Add To Build Path': + +![JAS-mine add build path](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-add-build-path.png) + +You can check the JAS-mine .jars are on the build path by right-clicking on the project and selecting 'Build Path / Configure Build Path'. Check the new .jars are there in the Libraries tab. Note that your IDE should no longer show any Errors in your project related to missing JAS-mine classes. You should see the following: + +![JAS-mine build path configured](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-build-path-1.png) \ No newline at end of file diff --git a/documentation/wiki/developer-guide/working-in-github.md b/documentation/wiki/developer-guide/working-in-github.md new file mode 100644 index 000000000..a4b7c2086 --- /dev/null +++ b/documentation/wiki/developer-guide/working-in-github.md @@ -0,0 +1,123 @@ +# Working in GitHub + +# 1. Introduction + +In this page, the various steps that are necessary to make changes to SimPaths and for them to be correctly implemented in the code and committed via GitHub (_i.e._, made available to other users) are explained. + +**Requirements** +- GitHub account +- Java Development Kit (JDK) +- IDE (Integrated Development Environment) + +In the following sections, explanatory screenshots are presented from both the GitHub browser and GitHub Desktop. While the latter is not required, GitHub Desktop provides a very user-friendly graphical user interface (GUI). It clearly visualises commits, branches, changes, and merge conflicts, and it is ideal for beginners or those who prefer not to use the command line. Additionally, it is designed for a quick setup and a seamless GitHub account integration. Thus, for the sake of contributing to SimPaths development, GitHub Desktop is a complete tool. However, it is not necessary; the user can entirely operate with the browser version in combination with the IDE or any other Git-integrated tool, _e.g._, GitKraken (GUI) or Git CLI (Command Line). + +As an IDE, this guide uses [IntelliJ IDEA](https://www.jetbrains.com/idea/) (free Community edition), which is also our recommendation (especially for beginners). A valuable alternative is [Eclipse](https://eclipseide.org/), whose GUI is less intuitive, but it is fully open-source and highly extensible. Other alternatives exists, including [Theia](https://theia-ide.org/), a rapidly developing AI-native open-source cloud and desktop IDE. +Please ensure that your GitHub account is active and already connected to the chosen IDE (and GitHub Desktop, if you wish to use it). + + +# 2. Branches and Forks in GitHub Repositories + +A GitHub repository (repo) is characterised by _branches_ and _forks_. While they are both tools for parallel work and development, they operationally differ. +A branch is a parallel version of the code within the same repository. To integrate changes made in a branch into another, it suffices to merge it back into the original branch via a pull request (PR) or direct merge. +A fork is a copy of an entire repository (including its history) under a different GitHub account. It creates a completely separate project, where changes can be made without affecting the original repo. To integrate these changes into the original repository, a PR is submitted to the repo owners/maintainers. Once the PR is approved, the changes are merged into the target branch of the original repo. + + +# 3. SimPaths GitHub Repository + +SimPaths code is stored in a public [GitHub repo](https://github.com/centreformicrosimulation/SimPaths). The operative branches are `main`, which contains the most stable release, and `develop`, where modifications and updates are implemented. As outlined in the diagram below, to make changes in SimPaths, users are requested to: 1) fork the original repo under their GitHub account; 2) carry out all the modifications on a new branch originated from the `develop` branch of their forked repository; 3) commit and send a pull request to the maintainers. + +![image](https://www.dropbox.com/scl/fi/0dhv5z8rbcqfwi58khjgj/scheme.png?rlkey=dk735e2nrcz35pqltsxfkvmgm&raw=1) + +These steps are detailed below. + +## 3.1 Forking SimPaths Repo + +1. On the [SimPaths repo homepage](https://github.com/centreformicrosimulation/SimPaths) (see below), click on the top-right button "Fork". When hovering over it, the message _Fork your own copy of centreformicrosimulation/SimPaths_ will automatically appear. + +![image](https://www.dropbox.com/scl/fi/ql0ac9lpfc7olpttjidxv/Capture-d-cran-2025-07-16-15.54.25.png?rlkey=mlwykxxixgbxyvkihkoaw191l&st=ientbqmv&raw=1) + +2. Once clicked on "Fork," the following page will open. It is recommendable to give a distinguishable name to the repository (_e.g._, "SimPathsFork"). Regardless of the name, please ensure that the box _Copy the_ `main` _branch only_ is unticked (as in the image below). Then, click on the green button "Create fork". + +![image](../jasmine-reference/collection-filters.md) + +After the fork is created, a page identical to the SimPaths repo homepage will open automatically. The only difference is that, instead of the white-and-purple SimPaths logo, there will be the user GitHub account icon/picture, with text below saying _forked from centreformicrosimulation/SimPaths_. This confirms the creation of a copy of the entire SimPaths repo under the user GitHub account. + +3. At this point, the user is ready to clone the code by clicking on the green button "<> Code", and then on the icon with the two overlapping squares to the right of the url (see image below). When hovering over it, an automatic message _Copy url to clipboard_ will appear, which will turn automatically in _Copied!_, once clicked. + +![image](../jasmine-reference/regression-library.md) + +4. Now that the link is copied, the user is ready to open the cloned repository. This can be done either in the IDE (IntelliJ IDEA, in our case) or on GitHub Desktop. In both cases, expect a few-minute time for the repository to be cloned. + +
+ +**IntelliJ IDEA** +On the IntelliJ IDEA homepage, there are three buttons at the top-right corner. The user should click on the rightmost "Clone Repository", which will open the window below. +In the default option tab "Repository URL", it is sufficient to paste the copied url in the corresponding "URL:" box. The "Directory:" will automatically pre-compile, but the user can change it. In particular, they should make sure that the name identifies the forked repository (_e.g._, SimPathsFork). +Alternatively, if the user has connected their GitHub account to the IDE, by clicking on the option "GitHub" (the second in the left column), the repository will automatically appear in the list of the user's repository and can be selected from there. + +![image](https://www.dropbox.com/scl/fi/scw9ycgx64vg7yrpap20l/Capture-d-cran-2025-07-16-17.17.40.png?rlkey=usmv7q2xu7w4uo0zhewsyz73t&st=ve8m0oxw&raw=1) + +Once cloned, the repository will appear in the Projects list of IntelliJ IDEA, where it can be open by simply clicking on it. After opening the project, check whether the **Maven** tool window (Maven icon) is visible in the right-hand toolbar. If the Maven tool window is not present, import the Maven project manually:\ + (1) Go to File → Project Structure → Modules \ + (2) Click `+` → Import Module\ + (3) Select the pom.xml file in the local _SimPaths_ repository\ + (4) Complete the **Import from Maven** wizard using the default settings\ +After the import completes, the Maven tool window should appear and the project will be correctly configured. + +The landing page will be as the one below. By default, the current branch is set to `main`, from which the user should switch to `develop` by clicking on it and then "Checkout" (see image below). + +![image](https://www.dropbox.com/scl/fi/q2p7hhkhqa36g4pjgxev2/Capture-d-cran-2025-07-17-11.42.05.png?rlkey=xk1tl7a073clo9kwp9yte3bku&st=jjz7svem&raw=1) + + +**GitHub Desktop** +On the GitHub Desktop homepage, select "Clone Repository...". Depending on the operating system and whether it is the first time GitHub Desktop is opened, this option can be selected in different ways. For example, at the first use of GitHub Desktop on macOS, the option would appear (second) on a main list as "Clone a Repository from the Internet...". Regardless, when clicking on it, the following tab will appear, from which the user can select the forked repository called "user\_name/SimPaths". In the Local Path tab, it is necessary to give a name that is not already attributed to any other repository (_e.g._, "SimPathsFork"). For example, if the SimPaths repository has already been cloned without forking it, say, to test the model, the user will not be able to use simply "SimPaths" as a name. Once this is sorted out, the user can clone the repository by clicking on the blue "Clone" button: + +![image](https://www.dropbox.com/scl/fi/ok4j3mp9hvlugrivtzdld/Capture-d-cran-2025-07-17-10.29.17.png?rlkey=jd43c3de2n8k1loh5sxlsqb3h&st=gv7r5b53&raw=1) + +At the end of the cloning, an automatic message will pop up saying: _This repository is a fork. How do you plan to use it?_. The user should select the first option "To contribute to the parent project", and then "Continue". The landing page will be as the one below. By default, the "Current Branch" is set to `main`, from which the user should switch to `develop`. While it should automatically be up to date, it is always recommended to "Fetch origin" (at the righthand side of menu tab) to be sure that the branch is aligned with the latest updates. It is also important to keep the repo's branches up-to-date. Namely, from the forked repo on the user's account in GitHub browser, the user must simply click on "Sync fork" on the right before starting working on it to be sure the forked repo is up-to-date with the original one (see image below). + +![image](https://www.dropbox.com/scl/fi/4o61fg7cpdqs7u731ffpk/Capture-d-cran-2025-07-23-18.28.07.png?rlkey=770zypkcps95yz5oncv9o0zbj&st=3lrexyzs&raw=1) + +![image](https://www.dropbox.com/scl/fi/u7tqx50ets4iy3wrkldrh/Capture-d-cran-2025-07-17-11.21.50.png?rlkey=vwzh4zhqtfxgnjabb5gzy00pa&st=veeq21ls&raw=1) + +Now that the forked repository has been created ("SimPathsFork" in our case), the user can open the project in the IDE. On the IntelliJ IDEA homepage, there are three buttons at the top-right corner. The user should click on the second one "Open". The window that opens allows selection of the folder of the forked repository, which will be located at the Local Path selected during the cloning. In our case, reading from the image above "Clone Repository", it will be /Users/UserName/Documents/GitHub/SimPathsFork. At this point, it is sufficient to select the folder and click on the blue button "Open". The user will land automatically on the project open on the `develop`branch. + +At the end of these four steps, the user will have their forked repository under their GitHub account. + +## 3.2 Making Changes +1. From the develop branch in the new forked repository, the user should create a new branch devoted to the modifications they wish to make. Again, this can be done either from GitHub Desktop or directly from the IDE (IntelliJ IDEA). In any case, the branch should be named according to our [naming convention](https://github.com/centreformicrosimulation/SimPaths?tab=readme-ov-file#branch-naming-conventions). The first part of the name should indicate why that branch was created. For example, if the purpose is to fix a bug, this first part could be called "bug-fix". The following parts should get into the details of the broader objective of the branch indicated in the first part. Continuing with the same example, if the bug to fix is related to a specific class of the model, the second part of the name could be the name of the class. Every part of the name should be separated by a "/" symbol. Therefore, in our example, the branch would be called "bug-fix/class_name". While other details can be added as additional parts of the branch name (separated by the "/" symbol), we suggest to keep the branch names relatively short. +**IntelliJ IDEA** +In our forked project ("SimPathsFork" in our case), follow the exact same path illustrated to switch branch from `main` to `develop` (Section 1. "Forking SimPaths Repo"; point 4; second image). In this case, however, in the drop-down menu, select "+ New Branch...". In the small window that opens, rename the branch, make sure that the option "Checkout branch" is ticked, and then click on "Create". The user will land automatically on the new branch of the forked repository. +**GitHub Desktop** +On the GitHub Desktop homepage of the forked repository ("SimPathsFork" in our case), select "Current Branch" (second entry of the menu tab at the top) and then "New Branch". Ensure to _Create branch based on..._ "develop", as shown in the image below, and rename the branch properly (here, assuming to make a change that entails the addition of a new "ethnicity" variable, it is called "add-variable/ethnicity"). Then click the "Create Branch" blue button. + +![image](../jasmine-reference/statistical-package.md) + +When opening the forked project on IntelliJ IDEA, it will be already set on the new `add-variable/ethnicity` branch. +Whether it is directly via IntelliJ IDEA or through GitHub, at the end of this first step, the user will be set in the IDE on their new branch of their forked repository, which is the starting point to make any change or modification. + +2. At this point, the user is free to make the desired changes. When they are done, it is of paramount importance that the model is tested after the editing. If the model compiles and runs correctly without error messages, the user can proceed to the next and final step. + +## 3.3 Committing Changes +Once all the modifications have been implemented and the model has been tested to function correctly, the updated version of the model can be made available to everyone. This is done via a two-step procedure. First, the changes must be committed, _i.e._, "sent" and fully integrated into the corresponding branch.[1](#footnote-1) After the commit, the newly published branch will contain all these modifications. Second, the branch must be merged, via a pull request, with the `develop` branch of the public repo centreformicrosimulation/SimPaths, so that everyone may access the updated version of the model. +As in the previous points, this can be done either from GitHub Desktop or directly from the IDE (IntelliJ IDEA). For the sake of simplicity, in this case, only the procedure using GitHub Desktop is presented. + +1. The GitHub Desktop homepage should look as follows.: + +![image](https://www.dropbox.com/scl/fi/yj6g7s2tv9svq56rhk0i6/Capture-d-cran-2025-07-23-17.49.27.png?rlkey=tl4p19noc3i0725gviybs4v0z&st=4wfgmk11&raw=1) + +In the left column, all the modifications that have been made are listed. If we click on any of them, the actual changes appear on the righ-hand side of the window (red: what has been deleted; green: what has been added). Before committing the changes, it is very important to: i) untick any modifications that entail the upload of data (data cannot be uploaded on GitHub for confidentiality reasons); ii) add a relevant summary and description to the commit in the bottom-left corner. When both these operations are completed, the user is ready to commit the changes by clicking the blue button at the end of the page "Commit X files to branch-name". After committing, the branch should be published by clicking on "Publish branch" (third entry of the menu tab on the top). + +2. At this point, the user should switch to GitHub on their browser and access the [Pull requests](https://github.com/centreformicrosimulation/SimPaths/pulls) section of centreformicrosimulation/SimPaths, where they shall select the green button "New pull request" on the right. Before being able to send the pull request, the user will be asked to choose the branches to compare, as illustrated below: + +![image](https://www.dropbox.com/scl/fi/fqcpwa93rqgyyf6gaoeid/Capture-d-cran-2025-07-23-18.09.10.png?rlkey=qycxsye2t2pzk8cms4wwrrg7j&st=op2rc1t3&raw=1) + +On the left-hand side, the base repository should always be set to centreformicrosimulation/SimPaths, and the branch on `develop`. On the right-hand side, the user should select their forked repository and the branch where they made all the changes. At this point, the changes will automatically appear underneath and the user will be able to click on the green button on the right "Create pull request". In the window that opens, the title and description will be precompiled from the commit. If they are already self-explanatory, the user can simply click again on the green button on the right "Create pull request" to complete the operation. At this point, an automatic system of tests will be launched to run the model on the server and double-check it works, but no actions are required by the user. +The last step is simply to add one or more reviewers in the "Reviewers" tab at the top right of the page. The reviewer(s) will receive a notification and can review the changes committed before merging them into the centreformicrosimulation/SimPaths repository. + +# 4. Further changes + +Following the procedure presented in this page, any user should be able to carry out modifications to the model without any risk of jeopardising it. It is worth noting that, while Steps 2 and 3 must be followed for any new change, once the forked repository is created, it remains available for future use (_i.e._, it is not necessary to go through Step 1 again). + +[1] Committing changes only saves them locally; to "send" them to GitHub it is necessary to "push" changes. + diff --git a/documentation/wiki/figures/Chart Properties.png b/documentation/wiki/figures/Chart Properties.png new file mode 100644 index 000000000..757653b1e Binary files /dev/null and b/documentation/wiki/figures/Chart Properties.png differ diff --git a/documentation/wiki/figures/Charts.png b/documentation/wiki/figures/Charts.png new file mode 100644 index 000000000..1791402d8 Binary files /dev/null and b/documentation/wiki/figures/Charts.png differ diff --git a/documentation/wiki/figures/Output stream.png b/documentation/wiki/figures/Output stream.png new file mode 100644 index 000000000..01bdfa14f Binary files /dev/null and b/documentation/wiki/figures/Output stream.png differ diff --git a/documentation/wiki/figures/SimPaths GUI.png b/documentation/wiki/figures/SimPaths GUI.png new file mode 100644 index 000000000..369771f9b Binary files /dev/null and b/documentation/wiki/figures/SimPaths GUI.png differ diff --git a/documentation/wiki/figures/SimPaths parameters.png b/documentation/wiki/figures/SimPaths parameters.png new file mode 100644 index 000000000..96a4eab5a Binary files /dev/null and b/documentation/wiki/figures/SimPaths parameters.png differ diff --git a/documentation/wiki/figures/SimPaths-Buttons.png b/documentation/wiki/figures/SimPaths-Buttons.png new file mode 100644 index 000000000..65dd272c5 Binary files /dev/null and b/documentation/wiki/figures/SimPaths-Buttons.png differ diff --git a/documentation/wiki/figures/SimPaths-Chart-Zoom.png b/documentation/wiki/figures/SimPaths-Chart-Zoom.png new file mode 100644 index 000000000..05bec29c0 Binary files /dev/null and b/documentation/wiki/figures/SimPaths-Chart-Zoom.png differ diff --git a/documentation/wiki/getting-started/data/index.md b/documentation/wiki/getting-started/data/index.md new file mode 100644 index 000000000..654e51dcb --- /dev/null +++ b/documentation/wiki/getting-started/data/index.md @@ -0,0 +1,35 @@ +# Input Data + +SimPaths uses three types of data as input: + +1. The initial population to be evolved over time. [Available here](https://github.com/centreformicrosimulation/SimPaths/tree/develop/input/InitialPopulations/training) +2. Donor populations used to impute the effects of tax and benefit policy. [Available here](https://github.com/centreformicrosimulation/SimPaths/tree/develop/input/EUROMODoutput/training) +3. Estimated parameters governing transition probabilities assumed by the model. [Available here](https://github.com/centreformicrosimulation/SimPaths/tree/develop/input) + +Training data are provided for the first two of these data sets, while 'release' data are provided for the third data set. + +The model has been designed to draw the initial population from data reported by the UK Household Longitudinal Study (UKHLS). The UKHLS, (sometimes referred to as Understanding Society), is the successor to the British Household Panel Survey, and is the principal general-purpose panel survey administered in the UK. Multiple initial populations are derived from the UKHLS, corresponding to different years of data reported by the survey (from 2011 to 2017), and used for model validation. The donor populations for tax and benefit imputations are derived from UKMOD and are based on data reported by the Family Resources Survey (FRS). These data include a wide range of benefit unit characteristics in addition to tax and benefit payments. SimPaths imputes tax and benefit payments from these data by matching simulated individuals to individuals described by donor populations. Parameters for the UK have been estimated on UKHLS data, Waves 1 to 8, and FRS (labour supply and social care, various years). + +Training data are provided for the initial population (1) and the donor populations (2) because these data sources are drawn from publicly available sources that are subject to limitations by the respective data providers. The following sections describe how to generate 'release' data for these two data sets. + + +# 2. Obtain data for the initial population (for the UK) + +In addition to training data, the model comes supplied with a set of Stata do files that have been written to extract input data from the UKHLS. These do files can be found in the model directory: `SimPaths/input/InitialPopulations/compile/`. + +1. Obtain the most recent version of the UKHLS survey [UK data service](https://ukdataservice.ac.uk/) (SN6614, in STATA's tab format). Further to this, you need to obtain the most recent version of the Wealth and Assets Survey (WAS) (SN7215, in STATA's tab format). +2. Use Stata to open file 00_master.do, and edit global variables at the top of the file, save and run. +3. Copy the csv files generated following (2) to model directory: `SimPaths/input/InitialPopulations/`. +4. Run SimPathsStart, and select option "Load new input data for starting populations" from the Start-up Options window. + + + +# 3. Obtain data for tax-benefit donors (for the UK) + +SimPaths is designed to read in data describing tax-benefit payments generated by UKMOD. + +1. Obtain a copy of the most recent version of UKMOD from the [CeMPA website](https://www.microsimulation.ac.uk/ukmod/access/). +2. Obtain the most recently available "b" series of input data provided for UKMOD as described on the CeMPA website. +3. Run desired system years described by (1) UKMOD, using the (2) "b" series dataset - note that the same input data set should be used for all system years. System runs can be performed directly in UKMOD or calling UKMOD from STATA, R, or Python using the respective connectors. +4. Copy the files generated following (3) to model directory: `SimPaths/input/EUROMODoutput/`. Please note that it is required to provide UKMOD output files which include the base price year used by SimPaths (currently 2015). If no UKMOD output file is provided for the base price year, the initial database setup will fail. +5. Run SimPathsStart, and selection option "Load new input data for tax and benefit systems" from the Start-up Options window. \ No newline at end of file diff --git a/documentation/wiki/getting-started/data/initial-population-uk.md b/documentation/wiki/getting-started/data/initial-population-uk.md new file mode 100644 index 000000000..cea0af366 --- /dev/null +++ b/documentation/wiki/getting-started/data/initial-population-uk.md @@ -0,0 +1,8 @@ +# Initial Population (UK) + +In addition to training data, the model comes supplied with a set of Stata do files that have been written to extract input data from the UKHLS. These do files can be found in the model directory: `SimPaths/input/InitialPopulations/compile/`. + +1. Obtain the most recent version of the UKHLS survey [UK data service](https://ukdataservice.ac.uk/) (SN6614, in STATA's tab format). Further to this, you need to obtain the most recent version of the Wealth and Assets Survey (WAS) (SN7215, in STATA's tab format). +2. Use Stata to open file 00_master.do, and edit global variables at the top of the file, save and run. +3. Copy the csv files generated following (2) to model directory: `SimPaths/input/InitialPopulations/`. +4. Run SimPathsStart, and select option "Load new input data for starting populations" from the Start-up Options window. diff --git a/documentation/wiki/getting-started/data/tax-benefit-donors-uk.md b/documentation/wiki/getting-started/data/tax-benefit-donors-uk.md new file mode 100644 index 000000000..f40244801 --- /dev/null +++ b/documentation/wiki/getting-started/data/tax-benefit-donors-uk.md @@ -0,0 +1,9 @@ +# Tax-Benefit Donors (UK) + +SimPaths is designed to read in data describing tax-benefit payments generated by UKMOD. + +1. Obtain a copy of the most recent version of UKMOD from the [CeMPA website](https://www.microsimulation.ac.uk/ukmod/access/). +2. Obtain the most recently available "b" series of input data provided for UKMOD as described on the CeMPA website. +3. Run desired system years described by (1) UKMOD, using the (2) "b" series dataset - note that the same input data set should be used for all system years. System runs can be performed directly in UKMOD or calling UKMOD from STATA, R, or Python using the respective connectors. +4. Copy the files generated following (3) to model directory: `SimPaths/input/EUROMODoutput/`. Please note that it is required to provide UKMOD output files which include the base price year used by SimPaths (currently 2015). If no UKMOD output file is provided for the base price year, the initial database setup will fail. +5. Run SimPathsStart, and selection option "Load new input data for tax and benefit systems" from the Start-up Options window. \ No newline at end of file diff --git a/documentation/wiki/getting-started/environment-setup.md b/documentation/wiki/getting-started/environment-setup.md new file mode 100644 index 000000000..d121cab7c --- /dev/null +++ b/documentation/wiki/getting-started/environment-setup.md @@ -0,0 +1,26 @@ +# Environment Setup + +!!! warning "In progress" + This page is under development. Contributions welcome — + see the [Getting Started](index.md) overview. + +## Requirements + +- Java Development Kit (JDK) 11 or later +- Apache Maven 3.6 or later +- Git + +## Cloning the repository + +```bash +git clone https://github.com/centreformicrosimulation/SimPaths.git +cd SimPaths +``` + +## Building the project + +```bash +mvn clean install -DskipTests +``` + +Refer to the [Working in GitHub](../developer-guide/working-in-github.md) guide for the full development workflow. diff --git a/documentation/wiki/getting-started/first-simulation.md b/documentation/wiki/getting-started/first-simulation.md new file mode 100644 index 000000000..1c79c9b09 --- /dev/null +++ b/documentation/wiki/getting-started/first-simulation.md @@ -0,0 +1,5 @@ +# Running Your First Simulation + +!!! warning "In progress" + This page is under development. Contributions welcome — + see [Single Runs](../user-guide/single-runs.md) in the User Guide for current guidance. diff --git a/documentation/wiki/getting-started/index.md b/documentation/wiki/getting-started/index.md new file mode 100644 index 000000000..7597b2992 --- /dev/null +++ b/documentation/wiki/getting-started/index.md @@ -0,0 +1,20 @@ +# Getting Started + +This section walks you through everything needed to run SimPaths for the first time. + +## Prerequisites + +SimPaths is a Java project. Before running simulations you need: + +- Java Development Kit (JDK) 11 or later +- Maven (for dependency management) +- The required input data files (see [Input Data](data/index.md)) + +## Steps + +1. [Environment Setup](environment-setup.md) — install dependencies and clone the repository +2. [Input Data](data/index.md) — obtain the required UK input datasets +3. [Running Your First Simulation](first-simulation.md) — launch a baseline run +4. [Video Tutorials](video-tutorials.md) — guided walkthroughs + +If you are running multiple or scenario-based simulations, continue to the [User Guide](../user-guide/index.md) after completing these steps. diff --git a/documentation/wiki/getting-started/video-tutorials.md b/documentation/wiki/getting-started/video-tutorials.md new file mode 100644 index 000000000..1d8bb07ae --- /dev/null +++ b/documentation/wiki/getting-started/video-tutorials.md @@ -0,0 +1,6 @@ +# Video Tutorials + +* [Executable](https://panopto.essex.ac.uk/Panopto/Pages/Viewer.aspx?id=02b8c98b-82b7-4a44-8a1e-b29600c9c1b5) Installing and running the model from the executable. +* [Technical set-up](https://panopto.essex.ac.uk/Panopto/Pages/Viewer.aspx?id=52fb9b12-1648-4f94-bb2b-b28900ec32ca) Setting up the work environment. +* [Model structure](https://panopto.essex.ac.uk/Panopto/Pages/Viewer.aspx?id=79131b43-1f7b-4a06-9bdf-b29600c9c24b) Understanding the model structure. + diff --git a/documentation/wiki/index.md b/documentation/wiki/index.md new file mode 100644 index 000000000..6e350a1b3 --- /dev/null +++ b/documentation/wiki/index.md @@ -0,0 +1,115 @@ +--- +hide: + - navigation + - toc +--- + +
+ +# SimPaths + +

Open-source dynamic microsimulation for life course modelling — projecting careers, families, health, and finances across populations.

+ +
+ ✓ Open Source + 🌍 5 Countries + ☕ Java / JAS-mine + 🔬 UKRI HealthMod +
+ + + +
+ +--- + +## About SimPaths + +SimPaths builds upon **JAS-mine**, an open-source Java platform designed for discrete-event simulations. Models currently exist for the **UK, Greece, Hungary, Italy, and Poland**. + +
+ 🇬🇧 United Kingdom + 🇬🇷 Greece + 🇭🇺 Hungary + 🇮🇹 Italy + 🇵🇱 Poland +
+ +--- + +## Explore the Documentation + +
+ +
+🗺 + +### Overview + +What SimPaths models and why — architecture, modules, parameterisation, and country variants. + +[Explore Overview →](overview/) +
+ +
+ + +### Getting Started + +Set up your environment, load input data, and run your first simulation in minutes. + +[Get Started →](getting-started/) +
+ +
+ + +### User Guide + +Single and multi-run simulations, the graphical interface, parameter editing, and uncertainty analysis. + +[View User Guide →](user-guide/) +
+ +
+💻 + +### Developer Guide + +JAS-mine architecture, SimPaths internals, and step-by-step how-to guides for extending the model. + +[Developer Guide →](developer-guide/) +
+ +
+📚 + +### JAS-mine Reference + +Statistical packages, alignment and matching libraries, regression tools, and database utilities. + +[JAS-mine Reference →](jasmine-reference/) +
+ +
+📊 + +### Research & Validation + +Published papers, working papers, and model validation results. + +[Research →](research/) +
+ +
+ +--- + +!!! tip "New to SimPaths?" + Start with the [Overview](overview/) to understand what the model does, then follow the [Getting Started](getting-started/) guide to run your first simulation. + +For papers using SimPaths, see the [Research](research/) page. For European country variants, visit the [SimPathsEU](https://github.com/centreformicrosimulation/SimPaths) repository. diff --git a/documentation/wiki/jasmine-reference/alignment-library.md b/documentation/wiki/jasmine-reference/alignment-library.md new file mode 100644 index 000000000..03298a663 --- /dev/null +++ b/documentation/wiki/jasmine-reference/alignment-library.md @@ -0,0 +1,170 @@ +# The JAS-mine Alignment Library + +Alignment is a technique widely used in (dynamic) microsimulation modelling to ensure that the simulated totals conform to some exogenously specified targets, or aggregate projections (Baekgaard, 2002; Klevmarken, 2002, Li and O’Donoghue, 2014). Alignment is a way to incorporate additional information which is not available in the estimation data. The underlying assumption is that the microsimulation model is a poor(er) model of the aggregate, but a good model of individual heterogeneity: by forcing the microsimulation outcomes to match the targets in a way that is as least distortive as possible, the microsimulation model is left with the task of distributing the totals in the population. In general, the above assumption is very dangerous and unwarranted, and alignment should be looked at with great suspicion. + +One important thing to note is that the processes to be aligned are executed at an individual level, while alignment always takes place at the population level. That is, individual outcomes or probabilities are determined for each individual based on the chosen econometric specification and the estimated coefficients. This in general leads to a mismatch between the simulated (provisional) totals and the aggregate targets, which can of course be assessed only at the population level. The alignment algorithm then directly modifies the individual outcomes or probabilities of occurrence. + +# 1. Common Arguments + +All alignment methods in JAS-mine require 4 arguments (see the code blocks below for examples on how to use the methods in a JAS-mine model): + +1. **collection**: a collection of individuals whose outcome or probability of an event has to be aligned (e.g. all the population); +2. **filter**: a filter to be applied to the collection (e.g. all females selected to divorce); +3. **AlignmentProbabilityClosure** or **AlignmentOutcomeClosure**: a piece of code that i) for each element of the filtered collection computes a probability for the event (in the case that the alignment method is aligning probabilities, as in the SBD algorithm) or an outcome (in the case that the alignment method is aligning outcomes), and ii) applies to each element of the filtered collection the specific instructions coming from the alignment method used. (In the case of multiple choice alignment such as Logit Scaling, an '**AlignmentMultiProbabilityClosure**' is used instead, which handles a set of probabilities for the many possible event outcomes.); +4. **targetShare** or **targetNumber**: the share or number of elements in the filtered collection that are expected to experience the transition. (In the case of multiple choice alignment, this is an array of targetShare proportions, containing the share for each potential outcome). + +We introduce the Multiple Choice alignment methods available in JAS-mine (section A), followed by a description of the binary alignment methods available (section B), concluding with an introduction to alignment with variable agent weightings (section C). + +# 2. Multiple Choice Alignment + +The earliest alignment techniques implemented in JAS-mine fall into the category of binary alignment. Version 3.2.0 of JAS-mine introduced the possibility of **multiple choice alignment** by implementing the Logit Scaling alignment method of Stephensen (2016). + +## 2.1 Logit Scaling (LS) + +**Logit Scaling** (LS) is an alignment technique that is theoretically optimal in that it minimizes the information loss i.e. distortion (as measured by the relative entropy) in the process of aligning probabilities to given targets. In addition, it is computationally efficient and quick to run. The method is implemented using the 'Bi-Proportional Scaling' algorithm that quickly converges to the solution of the problem. This involves representing the set of state probabilities for all individuals in a population as a two-dimensional matrix, with each row representing an individual 'i', and each column representing a particular state 'a' (the choice or outcome of the process to be aligned). The matrix undergoes an iterative process whereby: + +1. The sum of each column of probabilities is scaled to match the alignment target, which is the expected (mean) number of individuals in the state 'a' that the column represents; +2. The sum of each row is then scaled to equal 1 (as the sum of state probabilities should always equal 1, i.e. the individual must be in one particular state at any moment in time). + +Steps 1) and 2) are then repeated in sequence until the system converges. Not only does Logit Scaling alignment minimize the distortion to the probability distributions of the individuals while obtaining the alignment targets, but it has a number of useful features including the ability to retain zero probabilities (i.e. impossible events), a symmetric formulation where neither outcome (or choice) has a favoured status in the algorithm, and moreover the ability to handle more than just two choices (which the following alignment algorithms are all restricted to doing). Logit Scaling is the clear choice for any user wanting to perform multiple choice alignment in JAS-mine. + +### Example: Multiple Choice Logit Scaling + +Here is an example of how the multiple choice Logit Scaling alignment could be implemented in a JAS-mine model where there are three potential outcomes (note the last argument is an array containing three elements that sum to 1). + +```java +new LogitScalingAlignment().align( + + // collection + persons, + + // filter + new FemaleToDivorce(ageFrom, ageTo), + + // alignment probability closure for multiple choice alignment + new AlignmentMultiProbabilityClosure() { + + @Override + public double[] getProbability(Agent agent) { + return agent.getProb(); + } + + @Override + public void align(Agent agent, double[] alignedProabability) { + agent.setChoice(RegressionUtils.event(Choice.class, alignedProabability)); + } + }, + + new double[]{targetShareA, targetShareB, (1.- targetShareA - targetShareB)}); + +} +``` + +# 2. Binary Choice Alignment + +In addition to Logit Scaling alignment, there are six binary alignment methods implemented in JAS-mine: + +1. Multiplicative Scaling (MS), +2. Sidewalk (SW), +3. Sorting by the difference between predicted probability and random number (SBD), and Sorting by the difference between logistic adjusted predicted probability and random number (SBDL), +4. Resampling (RS). + + +Implementation of (1)-(4) is based on Li and O'Donoghue (2014) (Jinjing Li kindly provided the Stata code used in that paper), while implementation of (5) closely follows Richiardi and Poggi (2014) and Leombruni and Richiardi (2006). + +## 2.1 Multiplicative Scaling (MS) + +**Multiplicative scaling** involves undertaking an unaligned simulation using Monte Carlo techniques and then comparing the proportion of transitions with the external control total. The average ratio between the desired transition rate and the actual transition is used as a scaling factor for the simulated probabilities. The method ensures that the average scaled simulated probability is the same as the desired transition rate. The method, however, is criticized by Morrison (2006) as probabilities are not guaranteed to stay in the range 0-1 after scaling, though the problem is rare in practice as the multiplicative ratio tends to be small. + +## 2.2 Sidewalk (SW) + +**The Sidewalk** method was first introduced as a variance reduction technique, which was also used as an alternative to the random number based Monte Carlo simulation. It keeps a record of the accumulated probability from the first modelled binary outcome to the last. As long as there is a change of the integer part of the accumulated probability, the observation is assigned with an outcome value of 1. + +## 2.3 Sorting based alignment algorithms + +Sorting based alignment algorithms involve sorting of the predicted probability adjusted with a stochastic component, and selects desired number of events according to the sorting order: **SBD** sorts by the difference between the predicted probability and a random number in (0,1), while +**SBDL** sorts by a logistic transformation of the predicted probability. + + +Both SBD and SBDL introduce a significant distortion in the estimated probabilities and their use is deprecated. However, they are included for replication exercises. + +## 2.4 Resampling (RS) + +**Resampling** involves drawing again the event, without altering the predicted probabilities, either for agents who have experienced the transition (if too many transitions have occurred) or for agents that have not experienced the transition (if too few events have occurred), until the target is reached. + +## 2.5 Binary Logit Scaling (LSb) + +Implementation of **binary Logit Scaling** (LSb) closely follows Stephensen (2016) and the description of the Bi-Proportional Scaling algorithm above, however it exploits the two-state property of the system to simplify the algorithm. + +## 2.6 Other Binary Alignment Algorithms + +Li and O'Donoghue (2014) analyse three other binary alignment algorithms: + +1. Sidewalk with nonlinear transformation (SNT), +2. The central limit theorem approach (CLT) and +3. Sorting by predicted probability (SBP). + +SNT and CLT have not been implemented yet in JAS-mine as they are relatively more complicated and run much slower than the other methods; SBP has not been implemented due to its theoretical shortcomings and poor empirical performances (see Li and O'Donoghue, 2014). + +## 2.7 Example: Binary Choice Alignment (SBD) + +```java +new SBDAlignment().align( + + // collection + persons, + + // filter + new FemaleToDivorce(ageFrom, ageTo), + + // alignmentProbabilityClosure + new AlignmentProbabilityClosure() { + + // i) compute the probability of divorce + @Override + public double getProbability(Person agent) { + return agent.computeDivorceProb(); + } + + // ii) determine what to do with the aligned probabilities + @Override + public void align(Person agent, double alignedProbability) { + boolean divorce = RegressionUtils.event(alignedProbability, SimulationEngine.getRnd()); + agent.setToDivorce(divorce); + } + }, + + // targetShare + divorceTarget +); +``` + +# 3. Alignment with Weighting + +Another new feature introduced in version 3.2.0 of JAS-mine is that alignment can now be done on an agent population where each agent carries a weighting that defines the number of individuals it represents. For example, an agent with a 'weighting' variable equal to 4 means that the agent should be considered to represent four individuals. These weightings need to be taken into account when alignment occurs as not all agents are considered to represent the same number of individuals. + +The JAS-mine alignment classes that allow for variable agent weightings rely upon the agent implementing the 'Weighting' interface, which means that the agent class contains a public method called 'getWeighting()'. + +### Available Weighted Alignment Classes + +The following alignment classes catering for variable agent weightings are available in JAS-mine: + +* **ResamplingWeightedAlignment** – the resampling alignment algorithm for agents implementing the Weighting interface, +* **LogitScalingWeightedAlignment** – the multiple choice Logit Scaling alignment algorithm for agents implementing the Weighting interface, +* **LogitScalingBinaryWeightedAlignment** – the binary choice Logit Scaling (LSb) alignment algorithm for agents implementing the Weighting interface. + + +# 4. References + +Baekgaard H (2002). “Micro-macro linkage and the alignment of transition processes: some issues, techniques and examples”. National Centre for Social and Economic Modelling (NATSEM) Technical paper No. 25. + +Klevmarken A (2002). “Statistical inference in micro-simulation models: incorporating external information”. Mathematics and Computers in Simulation, 59: 255-265. + +Leombruni R, Richiardi M (2006). "LABORsim: An Agent-Based Microsimulation of Labour Supply. An application to Italy." Computational Economics, vol. 27, no. 1, pp. 63-88 + +Li J, O'Donoghue C (2014). "Evaluating Binary Alignment Methods in Microsimulation Models". Journal of Artificial Societies and Social Simulation, 17(1): art. 15. + +Richiardi M, Poggi A (2014). "Imputing Individual Effects in Dynamic Microsimulation Models. An application to household formation and labor market participation in Italy." International Journal of Microsimulation, 7(2), pp. 3-39. + +Stephensen P (2016). "Logit Scaling: A General Method for Alignment in Microsimulation models." International Journal of Microsimulation, 9(3), pp. 89-102. + diff --git a/documentation/wiki/jasmine-reference/collection-filters.md b/documentation/wiki/jasmine-reference/collection-filters.md new file mode 100644 index 000000000..84f994005 --- /dev/null +++ b/documentation/wiki/jasmine-reference/collection-filters.md @@ -0,0 +1,54 @@ +# Collection Filters + +The most basic way to filter a collection (say, a list of workers) is by iterating over its elements and check whether the filtering condition (say, whether they are employed) applies: +```java +List employedWorkerList = new ArrayList(); +for (Worker w : workerList) + if ( w.getEmployed() ) employedWorkerList.add(w); +``` + +Collections can also be filtered without the iteration, by using the Apache [CollectionUtils](https://commons.apache.org/proper/commons-collections/javadocs/api-3.2.1/index.html?org/apache/commons/collections/CollectionUtils.html) and [Predicate](https://commons.apache.org/proper/commons-collections/javadocs/api-3.2.1/index.html?org/apache/commons/collections/Predicate.html) libraries: +```java +import org.apache.commons.collections.CollectionUtils; +``` +```java +import org.apache.commons.collections.Predicate; +``` + +and applying a JAS-mine filtering class implementing the `ICollectionFilter` interface, as follows: +```java +List employedWorkerList = new ArrayList(); +CollectionUtils.select( + workerList, filter, employedWorkerList +); +``` + +where the filter is implemented with the following *Closure:* +```java +new Predicate() { + public boolean evaluate(Object obj) { + Worker w = (Worker) obj; + return (w.getEmployed()); + } +} +``` + +**RMK:** At present, it is not possible to schedule an event for a filtered collection with an automatic evaluation of the filter at each scheduled time. That is, +```java +EventGroup eventGroup = new EventGroup(); +eventGroup.addCollectionEvent(CollectionUtils.select(workerList, new Predicate(){...}), + Agent.Process.DoSomething); +``` + +filters the list at t=0 -when the schedule is built- based on the characteristics of the objects in the list at t=0. If the filter has to be reevaluated at each time the call for the event is broadcasted, this must be done as a separate process, as with +```java +eventGroup.addEvent(this, Processes.UpdateEmployedWorkerList); +eventGroup.addCollectionEvent(employedWorkerList, Agent.Processes.DoSomething); +``` + +Starting with Java 8 (which requires Eclipse version Luna or later), it is possible to simplify further by using a Stream. A Stream is a data structure that is computed on-demand. A Stream doesn't store data, it operates on the source data structure (collection and array) and produce pipelined data that we can use and perform specific operations. As such, we can create a Stream from the list and filter it based on a condition: +```java +List employedWorkerList = workerList.stream().filter( + w -> w.getEmployed()).collect(Collectors.toList() +); +``` \ No newline at end of file diff --git a/documentation/wiki/jasmine-reference/enums.md b/documentation/wiki/jasmine-reference/enums.md new file mode 100644 index 000000000..7faf16270 --- /dev/null +++ b/documentation/wiki/jasmine-reference/enums.md @@ -0,0 +1,13 @@ +# Enums + +An **enum** is a special class that represents a group of constants (unchangeable variables). One way to think about enums for users familiar with, for example, Stata, is as of categorical variables – each category that the variable can take must be specified in the enum. In general, enums are well suited for use with values that we know are not going to change. + +For example, to define a variable `Gender` that can take either `Male` or `Female` values, we define an enum `Gender` in the `model.enums` package: + +![Gender Enum Example](https://www.microsimulation.ac.uk/wp-content/uploads/2020/06/image.png) + +We can then, for example, specify a variable `gender` in `Person` class, which will only ever take one of the values specified in the enum: + +![Gender Variable Example](https://www.microsimulation.ac.uk/wp-content/uploads/2020/06/image-1.png) + +For a more detailed tutorial on enums we recommend following [W3Schools](https://www.w3schools.com/java/java_enums.asp) and [Oracle's tutorial](https://docs.oracle.com/javase/tutorial/java/javaOO/enum.html). \ No newline at end of file diff --git a/documentation/wiki/jasmine-reference/index.md b/documentation/wiki/jasmine-reference/index.md new file mode 100644 index 000000000..1c8ee3753 --- /dev/null +++ b/documentation/wiki/jasmine-reference/index.md @@ -0,0 +1,3 @@ +# JAS-mine Reference + +This section describes specific JAS-mine features and useful concepts. \ No newline at end of file diff --git a/documentation/wiki/jasmine-reference/links.md b/documentation/wiki/jasmine-reference/links.md new file mode 100644 index 000000000..6b9ae28fe --- /dev/null +++ b/documentation/wiki/jasmine-reference/links.md @@ -0,0 +1,9 @@ +# Links + +Links are connections between two different objects in the simulation. Examples are husband-to-wife (a **one-to-one** relationship, in most legal systems), parent-to-offspring (a **one-to-many** relationship, or a many-to-one if looked from the other side), and firms-to-workers (a many-to-many relationship, as a firm can employ multiple workers, and a worker can be employed at multiple firms at the same time). + +When an object has more than one link with objects belonging to the same entity (eg. a parent to his/her offspring, a firm to its employees, a worker to the firms he/she is working for), a collection is generally used to store the linked objects’ id (eg. a list of children, a list of employees, a list of firms). + +A problem then arises if one wants to keep record of all the links, as (i) the list size is not a priori defined, (ii) the list size can change over time. The solution envisaged in the JAS-mine architecture is to fully exploit the potentiality of the underlying relational database, and model the link as an object in itself, which will then be stored in a separate table with the identifiers of the two connected nodes and the time. + +The JAS-mine demo model [Applications](https://www.microsimulation.ac.uk/jas-mine/demo/job-applications) exemplifies. \ No newline at end of file diff --git a/documentation/wiki/jasmine-reference/matching-library.md b/documentation/wiki/jasmine-reference/matching-library.md new file mode 100644 index 000000000..e0845a0ab --- /dev/null +++ b/documentation/wiki/jasmine-reference/matching-library.md @@ -0,0 +1,168 @@ +# The JAS-mine Matching Library + +JAS-mine has a specific library that performs matching between two collections of agents, based on some specific criterion. The matching methods are called from outside the agents to be matched, for instance by the Model. The simplest algorithm is a one-way matching procedure (the agents in one collection – say females – choose, while the agents in the other collection – say males – remain passive) implemented in the class `SimpleMatching`: + +```java +matching(collection1, filter1, comparator1, collection2, filter2, matchingSClosure, matchingClosure); +``` + +This is invoked as + +```java +SimpleMatching.getInstance().matching(...) +``` + +# 1. Method Arguments + +The method requires 7 arguments: + +1. **collection1**: the first collection (e.g. all individuals in the population); +2. **filter1**: a filter to be applied to the first collection (e.g. all females with the toCouple flag on in the Demo07 sample model); +3. **comparator1**: a comparator to sort the filtered collection, which determines the order that the agents in the filtered collection will be matched. +4. **collection2**: the second collection, which can be the same as collection1 (e.g. all individuals in the population) or a different one; the two collections do not need to have the same size; +5. **filter2**: a filter to be applied to the second collection (e.g. all males with the toCouple flag on in the Demo07 sample model); +6. **matchingScoreClosure**: a piece of code that assigns, for every element of the filtered collection1, a double value to each element of the filtered collection2, as a measure of the quality of the match between every pair; +7. **matchingClosure**: a piece of code that determines what to do upon matching. + +# 2. Understanding Closures + +The use of **closures**, which are relatively new to the Java language, allows a great simplification of the code. A closure is a function written by another function. Closures are so called because they enclose the environment of the parent function, and can access all variables and parameters in that function. This is useful because it allows us to have two levels of parameters. One level of parameters (the parent) controls how the function works. The other level (the child) does the work. While it is not required that the user knows about closures, it is interesting to understand why they are so useful. + +In the example, suppose that the females in the population are sorted according to some criterion, say beauty: the prettiest woman is the first to choose a partner, the second prettiest comes second, etc. The matchingScoreClosure sorts all possible mates according to some other criterion, say wealth. Hence, the prettiest woman gets the richest man, the second prettiest gets the second richest, etc. In such a case, a comparator would suffice to order the males in the population, as the ranking is the same irrespective of the female who is evaluating them. But suppose now that the attractiveness of a man depends on the age differential between himself and the potential partner: in such a case, the ranking is specific to each woman in the population. A simple comparator would still do the job, but the comparator should be able to access the identity of the woman who is making the evaluation as an argument, which requires a lot of not-so-straightforward coding. Closures allow to bypass this technical requirement – that has very little to do with modelling issues – because they can pass a functionality as an argument to another method; in other words, they treat functionality as method argument, or code as data. + +Technically, a closure is a function that refers to free variables in its lexical context. A free variable is an identifier (a name, the identity of the woman who is evaluating the men in the population, for instance) that has a definition outside the closure; it is not defined by the closure, but it is used by the closure. In other words, these free variables inside the closure have the same meaning they would have had outside the closure. + +## Example Implementation + +Closures in the *matching*() method are easier used than explained. An example is found in the *Demo07* sample model. The 7 arguments are: + +### collection1 +The whole population + +```java +persons +``` + +### filter1 +A subset of the female population + +```java +new FemaleToCoupleFilter() +``` + +### comparator1 +A comparator that assigns priority to the individual that has a lower difficulty in matching (this is determined by an individual's age in relation to the average) + +```java +new Comparator() { + @Override + public int compare(Person female1, Person female2) { + return (int) Math.signum((Math.abs(female1.getAge() - averageAge.getAverage()) - + Math.abs(female2.getAge() - averageAge.getAverage()))); + } +} +``` + +### collection2 +Same as collection1 + +```java +persons +``` + +### filter2 +A subset of the male population + +```java +new MaleToCoupleFilter() +``` + +### matchingScoreClosure +A closure that, given a specific female, computes for every male in the population a matching score + +```java +new MatchingScoreClosure() { + @Override + public Double getValue(Person female, Person male) { + return female.getMarriageScore(male); + } +} +``` + +### matchingClosure +A closure that creates a link between a specific female and a specific male, and sets up a new household + +```java +new MatchingClosure() { + + @Override + public void match(Person female, Person male) { + + female.marry(male); + male.marry(female); + } +} +``` + +# 3. Complete Method Example + +Hence, the whole method looks like: + +```java +SimpleMatching.getInstance().matching( + + // collection1: the whole population + persons, + + // filter1 + new FemaleToCoupleFilter(), + + // comparator1: a comparator that assigns priority to the individual that has a lower difficulty + // in matchingm (this is determined by an individual's age in relation to the average) + + new Comparator() { + + @Override + public int compare(Person female1, Person female2) { + + return (int) Math.signum( + Math.abs(female1.getAge() - averageAge.getAverage()) - + Math.abs(female2.getAge() - averageAge.getAverage())); + + } + }, + + // collection2: same as collection1 + persons, + + // filter2 + new MaleToCoupleFilter(), + + // MatchingScoreClosure: a closure that, given a specific female, + // computes for every male in the population a matching score + + new MatchingScoreClosure() { + + @Override + public Double getValue(Person female, Person male) { + + return female.getMarriageScore(male); + + } + }, + + // matchingClosure: a closure that creates a link between a specific + // female and a specific male, and sets up a new household. + + new MatchingClosure() { + + @Override + public void match(Person female, Person male) { + + female.marry(male); + male.marry(female); + + } + } +); +``` diff --git a/documentation/wiki/jasmine-reference/querying-database.md b/documentation/wiki/jasmine-reference/querying-database.md new file mode 100644 index 000000000..e9ba921fd --- /dev/null +++ b/documentation/wiki/jasmine-reference/querying-database.md @@ -0,0 +1,72 @@ +# Querying the Database + +# 1. Querying the database at run-time + +The database can be queried at runtime to provide inputs for the simulation. The representation of the sample population is fully adherent to the standards used in IT systems to store entities and relations between entities. Consequently, population modelling can be performed according to standard strategies for modelling object classes and their persistence on database. In particular, ORM requires relationships between objects to be implicitly modelled. The ORM engine translates these relationships into foreign keys in the relational model. + +erDiagram + FIRM ||--o{ WORKER : employs + + FIRM { + int firm_id PK + string name + string address + string industry + date founded_date + int employee_count + } + + WORKER { + int worker_id PK + int firm_id FK + string first_name + string last_name + date birth_date + string position + decimal salary + date hire_date + } + +For instance, in the Entity/Relationship (E/R) diagram above, only the firm_id field containing the primary key of the firm table requires specification. In case all records concerning workers related to one firm need to be obtained, without using ORM a specific SQL query should be created, then run, to extract and insert data in an object intended to represent the connected entity. When using ORM the persistence engine is simply requested to get object of the *Firm* class corresponding to the desired identifier and the object's relational graph is loaded automatically, including related workers (objects of the *Worker* class). For example using the notation `worker.getFirm().getName()` will read from the database the name of the company where a worker is employed without the need of defining any SQL query, not differently from what one would do for reading the same information from the *Firm* object itself, accessed through a specific pointer in the Worker class. + +Since Java 5 annotations were introduced to represent attributes/adjectives assigned to specific parts of code as classes or properties. Annotations decorate the elements they are associated to, in the sense that they attribute meanings that can be used to add collateral information to objects. + +Annotations make the definition and the use of coefficients tables more powerful and flexible. For example, a table is created to represent and manage the mapping of two characteristics –minimum retirement age and expected residual lifetime– for each sex-age group of the simulated population. The table contains four fields: age, sex, retirementAge and residualLifeTime. These fields have in fact different semantics: the first two correspond to research keys in a key-value dictionary, while the last two represent specific values. + +ORM allows the construction of a Java class, for example called CoefficientA, that contains the four properties corresponding to the table fields; their values can then be read by the ORM engine. In order to populate the dictionary automatically the properties of the CoefficientA class can be "decorated" using the JAS-mine ad-hoc CoefficientMapping annotation. + +```java +@Entity +@CoefficientMapping(keys={"age", "sex"},values={"retirementAge", "residualLifetime"}) +public class CoefficientA { + private Integer age; + private Sex sex; + private Integer retirementAge; + private Double residualLifeTime; + […] +} +``` + +The Entity annotation informs the ORM engine that the CoefficientA class corresponds to a table in the database which bears the same name as the class and contains the fields corresponding to the object's properties. A JAS-mine library will then request the ORM engine to read the data contained in the table and to include them in a key-value structure that can be easily queried using an instruction like the following: + +```java +MultiKeyCoefficientMap coefficientA = DatabaseUtils.loadCoefficientMap(CoefficientA.class); +int retirementAge = coefficientA.get(30, Sex.Female, "retirementAge"); +double residualLifetime = coefficientA.get(30, Sex.Female, "residualLifetime"); +``` + +where the first two parameters of the get function are the two keys and the last two (retirementAge, residualLifetime) represent the name of the value variable. + +This method for accessing parameter tables may appear convoluted and cumbersome. The same result can be achieved more rapidly by placing the map values in an excel sheet. + +The parameters are then loaded using a specific JAS-mine interface: + +```java +MultiKeyCoefficientMap coefficientA = ExcelAssistant.loadCoefficientMap("input/coeffA.xls", "Sheet1", 2, 2); +``` + +Only the number of key columns and "value" columns need to be specified. Clearly this process is much easier but it does not allow for significant parameter typification (since Excel is not as rigid as a database). Moreover, it is more error prone as accidental modifications to the Excel sheet might lead to incorrect parameter loading. + +# 2. Inspecting the database before or after a simulation has completed + +The user may wish to access the input database before or simulation has been executed or afterwards to view the output database. A simple way to inspect the database is via the 'Database explorer', which can be opened via the 'Tools' tab in menu of the JAS-mine Graphical User Interface (GUI). Another slightly more complicated way involves downloading and installing Hibernate's H2 Console and specifying the full location of the database to be inspected. Both methods open a web browser interface that allows the data from the database to be accessed via SQL-style commands. \ No newline at end of file diff --git a/documentation/wiki/jasmine-reference/regression-library.md b/documentation/wiki/jasmine-reference/regression-library.md new file mode 100644 index 000000000..295fe71d8 --- /dev/null +++ b/documentation/wiki/jasmine-reference/regression-library.md @@ -0,0 +1,315 @@ +# The JAS-mine Regression Library + +The JAS-mine Core libraries support several types of regressions, including: + +* Linear Regression +* Logistic (Logit) Regression +* Probit Regression +* Multinomial Logistic (Logit) Regression +* Multinomial Probit Regression + +The relevant classes can be found in the microsim.statistics.regression package of the core JAS-mine libraries. The regression objects store the regression coefficients (the 'betas') of a regression. When they are passed another object such as an agent that holds the corresponding regressor or 'covariate' values (the 'x's in a regression), these objects can return a variety of values corresponding to linear regression 'scores', logit / probit 'probabilities' or random outcome from a binary event (i.e. whether an outcome takes place). Multinomial logit or probit regression objects return the random outcome from a finite set of possible outcomes. + +# 1. Basic Regression Objects: linear, logit and probit + +## 1.1 Creation of the regression objects + +The [Demo07 demo](https://www.microsimulation.ac.uk/jas-mine/demo/demo07) example uses the LinearRegression and LogitRegression classes, and also the extra functionality provided by static methods in the RegressionUtils class. + +For example, in the Parameters class, the regression objects are created, as shown below: + +```java +// regression coefficients +MultiKeyCoefficientMap coeffMarriageFit = ExcelAssistant.loadCoefficientMap("input/reg_marriage.xls", "Sheet1", 1, 1); + +MultiKeyCoefficientMap coeffDivorce = ExcelAssistant.loadCoefficientMap("input/reg_divorce.xls", "Sheet1", 1, 1); + +MultiKeyCoefficientMap coeffInWork = ExcelAssistant.loadCoefficientMap("input/reg_inwork.xls", "Sheet1", 3, 1); + +// definition of regression models +LinearRegression regMarriageFit = new LinearRegression(coeffMarriageFit); +LogitRegressio regDivorce = new LogitRegression(coeffDivorce); +LogitRegressio regInWork = new LogitRegression(coeffInWork); +``` + +The first few lines take regression coefficients from Microsoft Excel .xls files that are stored in the project's input folder, and the *ExcelAssistant.loadCoefficientMap()* method converts them into MultiKeyCoefficientMap types. The *MultiKeyCoefficientMap* type, found in the microsim.data package of the core JAS-mine libraries, is a class that extends the Apache Commons MultiKeyMap type, allowing users to retrieve regression coefficients depending on multiple keys that correspond to the regressor (covariate) and the attributes of the agent. The last two arguments in the *loadCoefficientMap()* method refer to the number of key columns and the number of value columns respectively. All *MultiKeyCoefficientMaps* have one value column – the regression coefficients. The "Sheet1" argument refers to the name of the relevant Excel worksheet; an alternative approach could be to place all regression coefficients in the same .xls file called 'reg_coefficients', but store coefficients for different regressions in different worksheets, for example named 'marriage', 'divorce', 'inWork' etc. + +The last three lines of the code box creates regression objects whose argument is the *MultiKeyCoefficientMap* of regression coefficients. In particular, regMarriageFit is a *LinearRegression* object, whilst regDivorce and regInWork are *LogitRegression* objects. *ProbitRegression* objects are created in a similar way. + +An example of the information used in the reg_inwork.xls file is shown below; note that it has three key columns, hence the second argument of the *loadCoefficientMap()* method used to create the *coeffInWork* object is '3'. As you can see from the data below, the first column lists the regressor (or 'covariate') names, and the second and third columns specify the conditions of the agent that need to be checked when applying the regression object – different regression coefficients apply to different types of agent (in this case, the relevant agent attributes are their gender and their work status ('gender' and 'workState' are the names of the agent's relevant fields). + +Note that it is important, when using .xls files to hold regression coefficients, to follow the convention of using the header 'REGRESSOR' for the regressors column and 'COEFFICIENT' for the coefficients column. This is because the JAS-mine methods check *MultiKeyCoefficientMaps* for these column headers when performing calculations for regression functionality such as bootstrapping (using the *RegressionUtils.bootstrap()* method). Indeed, REGRESSOR and COEFFICIENT are special enum constants of the Enum *RegressionColumnNames* class in the microsim.statistics.regression package. + +| REGRESSOR | gender | workState | COEFFICIENT | +|-----------|--------|-----------|-------------| +| age | Male | Employed | -0.196599 | +| ageSq | Male | Employed | 0.0086552 | +| ageCub | Male | Employed | -0.000988 | +| isMarried | Male | Employed | 0.1892796 | +| workIntercept | Male | Employed | 3.554612 | +| age | Male | NotEmployed | 0.9780908 | +| ageSq | Male | NotEmployed | -0.0261765 | +| ageCub | Male | NotEmployed | 0.000199 | +| workIntercept | Male | NotEmployed | -12.39108 | +| age | Female | Employed | -0.2740483 | +| ageSq | Female | Employed | 0.0109883 | +| ageCub | Female | Employed | -0.0001159 | +| isMarried | Female | Employed | -0.0906834 | +| workIntercept | Female | Employed | 3.648706 | +| age | Female | NotEmployed | 0.8217638 | +| ageSq | Female | NotEmployed | -0.0219761 | +| ageCub | Female | NotEmployed | 0.000166 | +| isMarried | Female | NotEmployed | -0.5590975 | +| workIntercept | Female | NotEmployed | -10.48043 | + +## 1.2 How to use the Linear Regression objects + +Linear regression objects return the score of the linear regression, i.e. the inner product of the regression coefficients with the regressors (the sum over i of beta_i * x_i). This can be invoked as in the following example, where the Person object called 'ross' provides the regressor values (the 'x's) to the LinearRegression object, which holds the regression coefficients (the betas). + +```java +double marriageScore = Parameters.getRegMarriageFit().getScore(ross, Person.Regressors.class); +``` + +This case uses the *getScore()* method with signature: + +```java +public > double getScore(IDoubleSource iDblSrc, Class enumType) +``` + +This is because the Person class implements the *IDoubleSource* interface – this is how the *Person* class retrieves the correct regressor values, using the *Person.Regressors* inner enum class. Note that these methods require the specification of the *Person.Regressors* inner enum class. As seen in the *Person* class of the Demo07 demo example model: + +```java +// --------------------------------------------------------------------- +// implements IDoubleSource for use with Regression classes +// --------------------------------------------------------------------- + + +public enum Regressors { + + //For in work regression + age, + ageSq, + ageCub, + isMarried, + workIntercept; + +} + + +public double getDoubleValue(Enum variableID) { + + switch ((Regressors) variableID) { + + //For work regression + case age: + return (double) age; + case ageSq: + return (double) age * age; + case ageCub: + return (double) age * age * age; + case isMarried: + return civilState.equals(CivilState.Married)? 1. : 0.; + case workIntercept: + return 1.; //The constant intercept, so regression coefficient is multiplied by 1 + + default: + throw new IllegalArgumentException("Unsupported regressor " + variableID.name() + " in Person.getDoubleValue"); + + } + +} +``` + +There are other getScore() methods that also return the linear regression score but use different input arguments – see the [Javadocs](https://www.microsimulation.ac.uk/jas-mine/resources/api/) of JAS-mine-core's microsim.statistics.regression package. + +## 1.3 How to use the Logit and Probit regression objects + +The logit and probit regression objects return the logit or probit transforms of the linear regression score, respectively. As these transforms produce numbers bounded in the interval [0, 1], they are often interpreted as 'probabilities' that an event occurs or not (an event with a binary outcome). Hence logit and probit regressions are used to model the outcome of binary events. + +The methods available in the *LinearRegression* and *ProbitRegression* classes include *getProbability()*, which returns the 'probability' (i.e. the logit and probit transform of the linear regression score), and event(), which returns a boolean representing whether the event outcome is true or false (i.e. whether the outcome is deemed to occur or not). The *event()* method generates a random boolean whose value is true with probability equal to the value returned by the *getProbability()* method; conversely the boolean is false with probability equal to 1-*getProbability()*. + +In the Demo07 example model, we can see how the logit 'probability' is used to calculate the probability that *Person* object 'ross' is in work: + +```java +double workProb = Parameters.getRegInWork().getProbability(ross, Person.Regressors.class, ross, Person.RegressionKeys.class); +``` + +We could directly calculate the random boolean variable to determine whether *Person* object 'ross' is in work as follows: + +```java +boolean inWork = Parameters.getRegInWork().event(ross, Person.Regressors.class, ross, Person.RegressionKeys.class); +``` + +Note that these methods require the specification of the Person.RegressionKeys inner enum class. As seen in the Person class of the Demo07 demo example model: + +```java +// --------------------------------------------------------------------- +// implements IObjectSource for use with Regression classes +// --------------------------------------------------------------------- + + +public enum RegressionKeys { + + gender, + workState, + +} + + +public Object getObjectValue(Enum variableID) { + + switch ((RegressionKeys) variableID) { + + //For marriage regression + case gender: + return gender; + case workState: + return workState; + default: + throw new IllegalArgumentException("Unsupported regressor " + variableID.name()); + + } + +} +``` + +Just like the *LinearRegression* class, there are several version of the *getProbability()* and *event()* methods that cater for different input arguments: + +```java +boolean event(IDoubleSource, Class); +boolean event(IDoubleSource, Class, IObjectSource, Class); +boolean event(Map); +boolean event(Object); + +double getProbability(IDoubleSource, Class); +double getProbability(IDoubleSource, Class, IObjectSource, Class); +double getProbability(Map); +double getProbability(Object); +``` + +The different versions employ the corresponding methods from the *LinearRegression* class to calculate the regression score, so their usage follows the same conventions outlined in the Javadocs extract in section 1.2. + +# 2. Multinomial logit and probit regression objects + +Multinomial logit and probit regressions are used to determine the outcome of random events, where the outcome is taken from a finite set of possible outcomes. Respectively, they are the multi-outcome analogues of the logit and probit regressions, which is only suitable at modelling binary outcomes. In the case for N possible outcomes, it works by comparing the logistic or probit transform of the linear regression scores for N-1 outcomes, with the Nth outcome deemed to have a score of 0. From this, it creates relative probabilities of outcomes, which can then be sampled to determine which of the N outcomes occurs. + +The following section discusses *MultiProbitRegression* objects, however *MultiLogitRegression* objects are used in the same way, the only difference being that the logistic transform is used to map the linear regression score to a probability, instead of the probit transform. + +## 2.1 Creation of the regression objects + +The creation of *MultiProbitRegression* objects are slightly more involved as the *MultiProbitRegression* class accepts a HashMap of *MultiKeyCoefficientMaps* (each *MultiKeyCoefficientMap* stores regression coefficients corresponding to a unique outcome), so we need to create the Hashmap first. + +Imagine we want to create a *MultiProbitRegression* object to model a random outcome that could have three possible states, it is necessary to supply two *MultiKeyCoefficientMaps* representing two sets of coefficients to model two of the three possible states, whilst the third outcome is considered the 'default' mode. + +In the example below, we model the education level of agents in a simulation by specifying the regression coefficients for low and high education levels, with medium education as the default outcome. After first creating the *MultiKeyCoefficientMaps* of the two sets of regression coefficients, possibly taking these from Microsoft Excel .xls files as described in section 1.1, we then create a HashMap whose keys map an outcome (an *Education* enum constant representing the education level) to the corresponding regression coefficients. The *MultiProbitRegression* object is then created in the following way: + +```java +// Regression Coefficients +MultiKeyCoefficientMap coeffEducationLow = ExcelAssistant.loadCoefficientMap ("input/reg_education.xls", "Low", 1, 1); + +MultiKeyCoefficientMap coeffEducationHigh = ExcelAssistant.loadCoefficientMap ("input/reg_education.xls", "High", 1, 1); + +// Create HashMap to hold the regression coefficient MultiKeyCoefficientMaps +HashMap educationCoefficientMap = new HashMap(); + +educationCoefficientMap.put(Education.Low, coeffEducationLow); +educationCoefficientMap.put(Education.High, coeffEducationHigh); + +// Create the MultiProbitRegression objectMultiProbitRegression regEducationLevel = new MultiProbitRegression (educationCoefficientMap); +``` + +## 2.2 How to use the regression objects + +The outcome of an event modelled by the *MultiProbitRegression* object is determined in the following way for a *Person* object 'ross': + +```java +Education education = Parameters.getRegEducationLevel().eventType(IDoubleSource ross, Person.Regressors.class, Education.class); +``` + +Note that the *Person* class implements the *IDoubleSource* interface, which is how the value of the regressors ('regression covariates') are passed to the *MultiProbitRegression* object. An example of how this might be implemented in the Person class is in section 1.2, although the cases in the *getDoubleValue()* method must correspond to the regressors (covariates) used in the regression. The last argument specifies the return type T of the *eventType()* method and should always match the type on the left hand side. + +Similarly to Linear, Logit and Probit regression classes, there are several version of the *eventType()* method depending on the input arguments: + +```java +T eventType(IDoubleSource, Class, Class); +T eventType(Map); +T eventType(Object); +``` + +The different versions employ the corresponding methods from the *LinearRegression* class to calculate the regression scores of each outcome, which are subsequently used to calculate the probit transforms of each outcome, so their usage follows the same conventions outlined in the Javadocs referenced in section 1.2. + +# 3. Bootstrap methods to address parameter uncertainty + +The sources of uncertainty within a simulation model are discussed in the [Uncertainty analysis](https://www.microsimulation.ac.uk/jas-mine/resources/focus/uncertainty-analysis/) page. In order to address the issue of parameter uncertainty, JAS-mine provides methods to 'bootstrap' the regression coefficients of the model easily. Bootstrapping involves sampling the set of regression coefficients of a regression object from a multivariate normal distribution whose vector of expected values (means) are the set of regression coefficients estimated from the data, with the covariance matrix derived from the statistical error of the estimates. + +The new sample of ('bootstrapped') regression coefficients can then be used in a simulation run and the output recorded. The process can then be repeated by sampling a new set of bootstrapped regression coefficients to be used in another simulation run. By repeating this many times, an understanding of how parameter uncertainty affects the dynamics of the model can be developed, and estimates of the uncertainty of the model evolution can be quantified and visualised as in the Figure of the [Uncertainty analysis](https://www.microsimulation.ac.uk/jas-mine/resources/focus/uncertainty-analysis/) page. The [MultiRun class](https://www.microsimulation.ac.uk/jas-mine/resources/cookbook/the-multirun-class/) can be used to execute the repeated run of simulations, as described in the tutorial [How to run a simulation many times (design of experiments)](https://www.microsimulation.ac.uk/jas-mine/resources/tutorials/run-a-simulation-many-times/). + +## 3.1 Linear or Binary choice (Logit / Probit) bootstrapping + +There are two methods to perform bootstrapping on a single set of regression coefficients, corresponding to a Linear, Logit or Probit regression class. The difference between the use of each method depends on if you want to submit the regression coefficients and covariance matrix as separate MultiKeyCoefficientMaps to the bootstrap method, or whether you have a single MultiKeyCoefficientMap containing the numbers for both coefficients and covariance matrix. + +The example below demonstrated how to bootstrap regression coefficients, where the covariance matrix is passed to the method as a separate argument: + +```java +// Create MultiKeyCoefficientMaps from Excel spreadsheet containing separate worksheets +MultiKeyCoefficientMap coeffParticipationMales = ExcelAssistant.loadCoefficientMap ("input/reg_participationMales.xls","RegressionCoefficients", 1, 1); + + +MultiKeyCoefficientMap covarianceParticipationMales = ExcelAssistant.loadCoefficientMap ("input/reg_participationMales.xls", "CovarianceMatrix", 1, 10); + +// Call the bootstrap method for separate coefficient and covariance matrix MultiKeyCoefficientMaps +MultiKeyCoefficientMap newCoeffParticipationMales = RegressionUtils.bootstrap(coeffParticipationMales, covarianceParticipationMales); + +// Create Regression object from the new bootstrapped regression coefficients +ProbitRegression regParticipationMales = new ProbitRegression(newCoeffParticipationMales); +``` + +The resulting newCoeffParticipationMales object contains the new regression coefficients to be passed to the ProbitRegression object. Note that the covariance matrix is a 10 by 10 matrix, which can be seen by the 10 values columns specified in the loadCoefficientMap function call to create the covarianceParticipationMales object. + +The alternative bootstrap method only takes one MultiKeyCoefficientMap argument and relies upon the naming of the keys and values of the map. Considering the map as having been created from an Excel spreadsheet, the key column on the left hand side of the Excel worksheet must be titled 'REGRESSOR' and contain the names of all the covariates. To the right of the 'REGRESSOR' column, there must be a column containing the regression coefficients called 'COEFFICIENT', and a separate column named after each covariate that holds the covariance data between the row covariate and the column covariate. This means that when loading the data from the Excel worksheet using the .loadCoefficientMap, the number of key columns will be 1, whilst the number of values columns will be 11 (one for the regression coefficients, and 10 for the covariance matrix data), as can be seen in the example code below: + +```java +// Create MultiKeyCoefficientMap containing both regression coefficients and covariance matrix +MultiKeyCoefficientMap coeffsAndCovariance_ParticipationMales = ExcelAssistant.loadCoefficientMap ("input/reg_participationMales.xls", "coeffs_And_Covariance", 1, 11); + +// Call the bootstrap method for combined regression coefficient and covariance matrix +MultiKeyCoefficientMap newCoeffParticipationMales = RegressionUtils.bootstrap(coeffsAndCovariance_ParticipationMales); + +// Create Regression object from the new bootstrapped regression coefficients +ProbitRegression regParticipationMales = new ProbitRegression(newCoeffParticipationMales); +``` + +The probit regression object is created in the same way as before. + +## 3.2 Multinomial bootstrapping (for Multinomial Logit / Probit Regressions) + +The bootstrap method for the case of multinomial regression is called 'bootstrapMultinomialRegression', and the method returns an object that is required by the constructor methods of the MultiLogitRegression or MultiProbitRegression classes in order to create the regression objects. As discussed in section 2.1, this object is a Map whose keys are enum constants representing the outcome of the multinomial regression, and whose values are MultiKeyCoefficientMaps storing the set of regression coefficients for the particular outcome key. The bootstrapMultinomialRegression method bootstraps the sets of regression coefficients in each MultiKeyCoefficientMap. + +There are two input arguments to the bootstrapMultinomialRegression method: 1) the original outcome-coefficients Map that would normally be used to directly construct a multinomial regression object, 2) a MultiKeyCoefficientMap storing the covariance matrix data, combined for all outcomes. Note that in this case, the covariance matrix must contain all the cross-variance terms for all outcomes and all covariates – to repeat, only one covariance matrix is used and it covers all outcomes of the multinomial regression. This means that the (square) covariance matrix has dimensions of the size (N-1) times the number of covariates per outcome, or N times the number of covariates per outcome in the case where the base ('default') outcome is specified. (Note, the same covariates should appear for each outcome). + +The use of the bootstrapMultinomialRegression method is demonstrated below: + +```java +// Load Low Education Outcome regression coefficients from Excel spreadsheet +MultiKeyCoefficientMap coeffEducationLow = ExcelAssistant.loadCoefficientMap ("input/reg_education.xls", "Low", 1, 1); +// Load High Education Outcome regression coefficients MultiKeyCoefficientMap coeffEducationHigh = ExcelAssistant.loadCoefficientMap ("input/reg_education.xls", "High", 1, 1); +// Create Outcome-Coefficients Map +Map coeffEducationLowHighMap = new HashMap(); + +coeffEducationLowHighMap.put(Education.Low, coeffEducationLow); +coeffEducationLowHighMap.put(Education.High, coeffEducationHigh); + +// Load Covariance Matrix from Excel spreadsheet (combined data for Low and High Education covariances +// There are 2 outcomes (Low and High Education), and the same 8 covariates for each outcome, hence +// the number of values columns in the Excel worksheet is 16 +MultiKeyCoefficientMap educationLowHighCombinedCovariance = ExcelAssistant.loadCoefficientMap ("input/reg_education.xls", "Covariance", 1, 16); + +// Bootstrap the regression coefficients for all outcomes +Map newCoeffEducationLowHighMap = RegressionUtils.boostrapMultinomialRegression(coeffEducationLowHighMap, educationLowHighCombinedCovariance, Education.class); + +// Create regression object from the new bootstrapped regression coefficients in the outcome- +// coefficient map +MultiProbitRegression regEducationLevel = new MultiProbitRegression (newCoeffEducationLowHighMap); +``` + +Note that the resulting Map returned by the bootstrapMultinomialRegression method is then used as the input argument to construct the MultiProbitRegression object. diff --git a/documentation/wiki/jasmine-reference/saving-outputs.md b/documentation/wiki/jasmine-reference/saving-outputs.md new file mode 100644 index 000000000..3e316aa94 --- /dev/null +++ b/documentation/wiki/jasmine-reference/saving-outputs.md @@ -0,0 +1,172 @@ +# Saving Outputs + +# 1. Persistence + +Persistence is achieved by updating a table in the relational database corresponding to a class in the simulation model. Classes to be persisted must be annotated as `@Entity` classes: + +```java +@Entity +public class Person { + […] +} + +``` + +With such a simple operation, a table with the same name as the class is automatically added to the output, and the class is linked to this table. When the class is dumped by the Collector, all properties which are not annotated as `@Transient`, plus all the variables not defined as properties but implicitly defined by getters, are persisted in the database. + +Note that `@Entity` classes **MUST** have an empty constructor. If a superclass is involved, call to the super class constructor should be like in the example below: + +```java +protected Agent() { + super(null); +} + +``` + +# 2. Keys + +To provide a unique identifier for the table entries, Entity classes must specify a `PanelEntityKey` (annotated as `@Id`), which is a three-dimensional object which identifies the agent id, the simulation time and the simulation run. These three keys uniquely identify each record: + +```java +@Id +private PanelEntityKey key; + +``` + +PanelEntityKeys **MUST be annotated as @Id**, though the `PanelEntityKey` object can be given any name, and it is suggested that they are not called *id* (*key* is a better name). This is because `PanelEntityKey` objects contain three objects: + +* An **id** field of type `Long` representing the agent’s id. +* A **simulation_run** field storing the number of the simulation run (also of type `Long`), which is useful when running many simulations in sequence. +* A **simulation_time** field (of type `Double`). + +The fields of the `PanelEntityKey` are thus used to uniquely identify panel entries in the database tables. + +This is why to access the agent id the method `getId()` has to be invoked: if the `PanelEntitykey` is called *id*, this becomes `id.getId()`, which is confusing. If the agent id has to be accessed from other agents (for instance, to perform identity checks), the following `getAgentId()` method should be implemented: + +```java +@Entity +public class Agent { + @Transient + private static long idCounter = 1000000; + + @Id + private PanelEntityKey key = new PanelEntityKey(idCounter++); + + public Long getAgentId() { + return key.getId(); + } +} + +``` + +And then used as `agent.getAgentId()`. + +With the JAS-mine persistence engine, (pointers to) objects cannot be persisted (this has to do with the fact that the `PanelEntityKey` is a multiple key). Therefore, the agent’s `PanelEntityKey` should be included as an additional variable and persisted, while the (pointer to the) object should be annotated as `@Transient`: + +```java +@Entity +public class Application { + + @Id + private PanelEntityKey key; + + @Transient + private Worker worker; + + @Column(name="worker_id") + private Long workerId; +} + +``` + +The engine expects that the field names in the tables are the same as the property names in the Java class, except when a different name is specified as in: + +```java +@Column(name="dur_in_couple") +private Integer durationInCouple; + +``` + +Enumerations can be interpreted both as a string and as ordinal values (0 for the first enum, 1 for the second, etc.), depending on how they are annotated: + +```java +@Enumerated(EnumType.STRING) +private WorkState workState; + +``` + + +# 3. The `DataExport` class + +There are two ways of storing output data from the simulation runs, either to the database or to .csv files. Exporting to .csv is quicker than persisting to the database, so may be preferable when running simulations in time-constrained situations. + +In order to facilitate the exporting of data to .csv files and / or the database, a `DataExport` instance can be created in the `Collector` class for each object or collection of objects whose fields are to be recorded as in the code below. The choice of whether to export to the database and / or the .csv files can be controlled by two boolean fields `exportToDatabase` and `exportToCSV` respectively, which are passed to the constructor of the `DataExport` objects and can be set from the GUI (because they have the `@GUIparameter` annotation, which replaces the deprecated `@ModelParameter` annotation). + +```java +@GUIparameter(description = "Toggle to export snapshot to .csv files") +boolean exportToCSV = true; //If true, data will be recorded to .csv files in the output directory + +@GUIparameter(description = "Toggle to export snapshot to output database") +boolean exportToDatabase = true; //If true, data will be recorded in the output database in the // output directory + +@GUIparameter(description = "Set the time at which to start exporting snaphots to the database and/or .csv files") +Double timeToStartDataExport = 0.; + +@GUIparameter(description = "Set the time between snapshots to be exported to the database and/or .csv files") +Double timestepsBetweenDataDumps = 1.; + +// collectionOfAgents is a Java Collection of agents e.g. an ArrayList, LinkedList or Set containing +// instances of the Agent.class +DataExport populationOutput = new DataExport(collectionOfAgents, exportToDatabase, exportToCSV); + +// agent is an instance of the Agent.class and has PanelEntityKey with id = 123 +DataExport agentOutput = new DataExport(agent, exportToDatabase, exportToCSV); + +``` + +# 4. Export to csv + +When executed with the `exportToCSV` Boolean set to true, separate .csv files will be created corresponding to the `populationOutput` and `agentOutput` objects. The name of the .csv files will match the name of the class of object or entries of the collection of objects which were passed to the `DataExport` constructor. In the case above for instance, the `collectionOfAgents` is a Java Collection such as a list or set whose entries are of the `Agent` class, so the corresponding Agent.csv file will be created. On the other hand, the agent object is a single instance of the `Agent` class (not a collection), so a file named Agent123.csv will be created, with the suffix '123' matching the agent's id number in its `PanelEntityKey` instance. + +When created, the .csv files will contain a first (header) line with the comma-separated names of all the fields of the underlying class to be recorded. These include numerical values, strings, Booleans and enum constants, and both private fields and those inherited from the superclass are recorded. References to objects, however, are not exported, although the internal fields of the PanelEntityKey associated with the object whose data is being recorded will be exported. + +In order to export the objects' data to either the .csv files or the database, the `export()` method must be invoked on the `DataExport` instances. This can be placed in the event schedule, so that the objects' data can be recorded at regular times in the future thus providing a snapshot of the simulation run, or be invoked at any time and by any object in the simulation: + +```java +public void buildSchedule() { + // Dump info from year 'timeToStartDataExport' onwards, with 'timestepsBetweenDataDumps' + // specifying the period between data dumps thereafter + getEngine().getEventList().scheduleRepeat(new SingleTargetEvent(this, Processes.DumpInfo), timeToStartDataExport, Order.AFTER_ALL.getOrdering(), timestepsBetweenDataDumps); + + // Dump data at the (scheduled) end of the simulation + getEngine().getEventList().schedule(new SingleTargetEvent(this, Processes.DumpInfo), endYear(), Order.AFTER_ALL.getOrdering(), 0.); +} + +//////////////////////////////////////////////////////////// +// Event Listener +//////////////////////////////////////////////////////////// +public enum Processes { + DumpInfo, +} + +public void onEvent(Enum type) { + switch ((Processes) type) { + case DumpInfo: + populationOutput.export(); + agentOutput.export(); + break; + } +} + +``` + +When the `exportToCSV` boolean is set to true, the `.export()` invocation will dump comma-separated data to the .csv files. Again, the data included is either numerical, strings, Booleans or enum constants, and includes private and inherited fields belonging to the object or it's superclasses. In the case of the Agent.csv, one line will be added for each of the agent instances contained in the `collectionOfAgents` object, with each line referenced by values of the `PanelEntityKey`:- the simulation run number, the simulation time and the agent's id. In the case of Agent123.csv, a single line will be added containing the comma-separated data of the fields of the agent whose id is 123. + +# 5. Export to database + +When the `exportToDatabase` Boolean is set to true, the `DatabaseUtils.snap()` method will be invoked in the Collector, and JAS-mine's database functionality will kick in to export the data to the appropriate tables in the output database: + +```java +DatabaseUtils.snap( ( (PersonsModel) getManager()).getPersons() ); + +``` \ No newline at end of file diff --git a/documentation/wiki/jasmine-reference/statistical-package.md b/documentation/wiki/jasmine-reference/statistical-package.md new file mode 100644 index 000000000..63ad85e8d --- /dev/null +++ b/documentation/wiki/jasmine-reference/statistical-package.md @@ -0,0 +1,205 @@ +# The JAS-mine Statistical Package + +The statistics package is the built-in library of JAS-mine specifically designed to collect data in a simulation context. Since data sets collected from simulations are frequently updated and sometimes data structures change at runtime, the code is optimized to reduce memory occupancy and CPU time consumption. + +The present guide shows step by step the package features and their use. + +The package structure is composed of three sections: + +1. the **statistics** package contains the main interfaces and classes; +2. the **statistics.reflectors** package contains classes that retrieve data from common java objects; +3. the **statistics.functions** package contains the functions that compute statistics on data sets. The statistics computing algorithms are mainly based on the `cern.jet.stat` package. + +# 1. How JAS-mine retrieves data from objects + +In order to compute statistics, a statistical object must be able to dynamically collect data from simulation objects. This represents a problem, since the statistical library classes do not know the structure of the target objects (designed by users) and so they cannot access their internal data using instructions like `myObject.getDatum()`. + +The easiest solution to solve this problem is the use of reflectors, which are provided by the reflectors package. These classes use Java Reflection to inspect dynamically the target objects' structure and data. + +Let's consider an example. An agent represented by the class `MyAgent` contains two integer variables called `age` and `income`, as described by the following code: +```java +public class MyAgent { + + int age; + double income; + +} +``` + +Suppose that the user needs to create a series containing readings from the variable `income` for this agent. A typical instruction would be: +```java +MyAgent myAgent = new MyAgent(); +Series.Double seriesIncome = new Series.Double(myAgent, "income", false); +``` + +The last argument, `false`, signals that the value is not obtained from a method but must be retrieved using reflection. The constructor of the `Series.Double` class then automatically creates a `DoubleInvoker` object that reads the `income` variable within an instance of the `MyAgent` class. This way, every time the series is updated (with the `updateSource()` method, see below), the current value of the agent's income is appended to the `seriesIncome` internal data array. + +The reflection mechanism is very simple and elegant but, unfortunately, very inefficient, since it is about 20 time slower than a native direct access! So, in order to increase the speed, we need to access objects natively. + +JAS-mine defines a method for direct access, based on the `I*Source` and `I*ArraySource` interfaces, where the \* corresponds to the type of data to be provided: + +| **Single value output** | **Multiple value output (array)** | +| --- | --- | +| [IDoubleSource](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/IDoubleSource.html) | [IDoubleArraySource](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/IDoubleArraySource.html) | +| [IFloatSource](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/IFloatSource.html) | [IFloatArraySource](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/IFloatArraySource.html) | +| [ILongSource](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/ILongSource.html) | [ILongArraySource](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/ILongArraySource.html) | +| [IIntSource](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/IIntSource.html) | [IIntArraySource](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/IIntArraySource.html) | + +Each object containing interesting data to be collected should therefore implement one or more of these interfaces, according to the data type. + +In order to use these interfaces to natively access data inside the `MyAgent` class, the code has to be modified as follows: +```java +public class MyAgent implements IDoubleSource { + + public enum Variables { + Income; + } + int age; + double income; + + public double getDblValue(Enum variable) { + + if (variable.equals(Variables.Income)) + return income; + else + throw new IllegalArgumentException("Unsupported variable"); + + } +} +``` + +The series object previously defined can be now created using the following instructions: +```java +MyAgent myAgent = new MyAgent(); +Series series = new Series.Integer(myAgent, MyAgent.Variables.Income); +``` + +This way, the series object will now access the target object's variables through its `IDoubleSource` interface, simply by passing to its `getIntValue` method the right enum (`MyAgent.Variables.Income`). + +Although boring, this mechanism is more efficient than the previous one, and it is recommended for 'large' (or 'long') simulations. However, the choice between using the reflection or the native access is left to the user. + + +# 2. Series and time series + +A `Series` is a time memory data collector. It requires a single source and append at each update a new reading from the object to the list of stored values. If the source object implement the `I*Source` interface data are read directly, otherwise they are collected through a type specific reflector (`*Invoker`). + +The `Series` class provides four implementations to support natively the main Java data types. The available implementations are: + +* `Series.Double`, which implements the `IDoubleArraySource` interface; +* `Series.Float`, which implements the `IFloatArraySource` interface; +* `Series.Integer`, which implements the `IIntArraySource` interface; +* `Series.Long`, which implements the `ILongArraySource` interface. + +For instance, a series reading long values must be created using the `Series.Long` constructor. + +Each of the four classes implements a specific `I*ArraySource` interface, meaning that the series is able to return the data array of the specific data type, for subsequent use by another statistical object (see below the encapsulation mechanism). + +The series in not yet a time series, because it does not record the time when the data have been stored. In order to have a time series, the user has to append the series to a `TimeSeries` object, which can contain more than one series, synchronizing them with time. + + +# 3. Cross section objects + +A `CrossSection` object retrieves values from each agent or object contained in a Java collection. If these agents or objects implement the `I*Source` interface data are read directly, otherwise they are collected through a type specific reflector (`*Invoker`). + +At every update the cross section refreshes its current data cache and creates dynamically a new array of values, with the same dimension of the source collection. Differently from a `Series`, no memory of the old readings is preserved. + +The `CrossSection` class provides four implementations to natively support the main Java data types. The available implementations are: + +* `CrossSection.Double`, which implements the `IDoubleArraySource` interface; +* `CrossSection.Float`, which implements the `IFloatArraySource` interface; +* `CrossSection.Integer`, which implements the `IIntArraySource` interface; +* `CrossSection.Long`, which implements the `ILongArraySource` interface. + +So, for instance, a cross section reading float values has to be created using the `CrossSection.Float` constructor. Each of the four classes implements a specific `I*ArraySource` interface, in order to provide an array of the specific data type for further manipulation by other statistical objects (see below the encapsulation mechanism). + +If the user wants to collect data only from agents with particular characteristics, she can adopt the `ICollectionFilter` interface. Passing to the cross section an object with the `ICollectionFilter` interface (via the `setFilter()` method), it collects only the values from the agents filtered by the custom filter. + +If, for instance, we would like to compute the average income of the only "adult" agents in the agent list, we have to define a filter as follows: +```java +public class Filter implements ICollectionFilter { + + public boolean isFiltered(Object object) { + + return ( ((MyAgent) object).age >= 18 ); + + } +} +``` + +Passing an instance of the `Filter` class to the cross section, we will obtain an array representing the age of the "adult" agents only. + + +# 4. Functions + +A data source can be processed by a `*Function` object, which applies the function and return a value, via an `I*Source` interface. + +The functions contained by the `it.zero11.microsim.statistics.functions` package are divided in two main groups: + +1. The `*ArrayFunction` objects work with `I*ArraySource` sources which are refreshed at every `updateSource()` call. +2. The `*TraceFunction` objects work with single value sources (`I*Source`). Obviously a single value cannot be used to create a statistics, so these functions trace the value over time. For instance, the `MeanTraceFunction` computes the average value, by storing the sum and the count of the values it receives over time. + +As an example, the following table describes some Array functions which operate on array of source values: + +| **Function** | **Description** | +| --- | --- | +| [MinArrayFunction](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/functions/MinArrayFunction.html) | Finds the lowest value in the array. | +| [MaxArrayFunction](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/functions/MaxArrayFunction.html) | Finds the highest value in the array. | +| [MeanVarianceArrayFunction](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/functions/MeanVarianceArrayFunction.html) | Computes the average and the variance for the values in the array. | + +Finally, the following table gives an example of Trace functions which operate on single source values over time: + +| **Function** | **Description** | +| --- | --- | +| [MinTraceFunction](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/functions/MinTraceFunction.html) | Checks the source value over time keeping the lowest value ever received. | +| [MaxTraceFunction](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/functions/MaxTraceFunction.html) | Checks the source value over time keeping the highest value ever received. | +| [MultiTraceFunction](https://raw.githack.com/jasmineRepo/JAS-mine-core/master/microsim-core/doc/microsim/statistics/functions/MultiTraceFunction.html) | Computes the minimum, maximum, sum, mean and variance of the present and past readings by storing the sums and the count of the values received over time. | + + +# 5. The encapsulation system + +The `I*Source` and `I*ArraySource` interfaces are used to sequentially encapsulate different computational operations. Every time an object implements one of those interfaces it can be inserted in the encapsulation stack as a source of data used by a subsequent object in the stack. The encapsulation allows an infinite number of operations to be sequentially executed, with a single update operation (see below). + +It is important to point out that if an object requires a single value as input, it must receive data from an `I*Source` object, while if an object requires an array as input, it must receive data from an `I*ArraySource` object. Thus, Series and CrossSections must follow in the stack an `I*Source` object, while can provide data to Functions. + +As an example, suppose you want to compute, at every simulation time step, the moving average of the mean value of the agents' income. This value might be useful, for instance, to understand if the simulation has reached a stationary state. + +In order to obtain the moving average we need to perform the following tasks, at each simulation time step: + +1. collect data from all the agents contained in a list; +2. compute the average value of the collected data; +3. store the value into a time series object; +4. using the series, compute the current moving average. + +Thanks to the encapsulation system we can create a stack of operations and then obtain the value simply invoking one method. + +The figure below shows how to build the moving average computer: + +![JAS-mine stats encapsulation](https://www.microsimulation.ac.uk/wp-content/uploads/2019/06/JAS-mine-stats.png) + +Don't worry! The code to build this operation is simpler than its visual representation, as shown by the following instructions: +```java +CrossSection.Double crossIncome = new CrossSection.Double(agentList, "income", false); +Series.Double seriesMeanIncome = new Series.Double(new MeanFunction(csIncome)); +MovingAverageArrayFunction fMAIncome = new MovingAverageArrayFunction(seriesMeanIncome, 3 /*moving average window*/); +``` + + +# 6. How statistics are updated + +If user had to update all the elements in the encapsulation system, the system would be very complex to manage. In the previous example, the reader should update the `crossSection` object, than the series and finally the `ma` objects, to obtain a new reading of the moving average. + +Fortunately, JAS-mine automatically updates all statistical objects, using the `IUpdatableSource` interface. Each statistical object that retrieves data from an `I*Source` object checks if the source implements the `IUpdatableSource` interface and, if it does, updates it before reading the data. + +Through this method, each object in the stack is recursively updated. This makes statistics very easy to manage, but it may cause some problems when the same source is used more than once in the stack, as it would be updated twice or more. Imagine a situation in which a time series is forced to be updated twice in the same simulation step: it would append twice the current data. Even worse, if a series included in a `TimeSeries` object is updated twice in the same simulation step, it will go out of synchronization with the `TimeSeries`, and result in a compilation error. + +JAS-mine takes care of this problem by checking the simulation time before invoking the `updateSource()`, and ignoring objects which have been already updated. Obviously this choice does not permit to refresh data more than once per simulation step. In order to bypass this constraint, the user has to explicitly set to false the `checkingTime` property of the statistical object: +```java +Series.Long s = new Series.Long(anAgent, "aLongVariable", false); +s.setCheckingTime(false); +``` + +Summarizing the updating mechanism, we can enumerate the following rules of thumb: + +1. Each statistical object checks if the source implements the `IUpdatableSource` interface and, if it does, invokes the `updateSource()` method before reading the data. +2. When updated, each statistical object checks the current simulation time and performs the update only if the time is different from the last update time. +3. In order to force a statistical object to bypass time checking, its `checkingTime` property must be explicitly set to false, using the `setCheckingTime(false)` instruction. \ No newline at end of file diff --git a/documentation/wiki/overview/country-variants.md b/documentation/wiki/overview/country-variants.md new file mode 100644 index 000000000..af49f2a19 --- /dev/null +++ b/documentation/wiki/overview/country-variants.md @@ -0,0 +1,5 @@ +# Country Variants + +!!! warning "In progress" + This page is under development. Contributions welcome — + see the [Developer Guide](../developer-guide/index.md) for how to contribute. diff --git a/documentation/wiki/overview/how-to-cite.md b/documentation/wiki/overview/how-to-cite.md new file mode 100644 index 000000000..6dc3660e4 --- /dev/null +++ b/documentation/wiki/overview/how-to-cite.md @@ -0,0 +1,16 @@ +# How to Cite SimPaths + +# 1 SimPaths reference paper + +If you use SimPaths or derived work, please cite: + +Bronka P, van de Ven J, Kopasker D, Katikireddi SV, Richiardi M (2025). [SimPaths: an open-source microsimulation model for life course analysis](https://microsimulation.pub/articles/00318). *International Journal of Microsimulation*, 18(1): 95-133. + + +# 2 Further references + +- van de Ven J, Bronka P, Richiardi M (2025). [Welfare effects of social care policies](https://www.microsimulation.ac.uk/publications/publication-588564/). CeMPA WP 5/25. +- van de Ven J, Bronka P, Richiardi M (2024). [The life course effects of care](https://www.microsimulation.ac.uk/publications/publication-578383/). CeMPA WP 7/24. +- Richiardi M, Bronka P, van de Ven J (2025). [Attenuation and reinforcement mechanisms over the life course](https://www.sciencedirect.com/science/article/pii/S0167268125000319). *Journal of Economic Behavior & Organization*, 231: 106911. +- Kopasker D, Bronka P, Thomson RM, Khodygo V, Kromydas T, Meier P, Heppenstall A, Bambra C, Lomax N, Craig P, Richiardi M, Katikireddi SV (2024). [Evaluating the influence of taxation and social security policies on psychological distress: a microsimulation study of the UK during the COVID-19 economic crisis](https://doi.org/10.1016/j.socscimed.2024.116953). *Social Science and Medicine*: 116953. +- Thomson RM, Kopasker D, Bronka P, Richiardi M, Khodygo V, Baxter AJ, Igelström E, Pearce A, Leyland AH, Katikireddi SV (2024). [Short-term impacts of Universal Basic Income on population mental health inequalities in the UK: A microsimulation modelling study](https://journals.plos.org/plosmedicine/article?id=10.1371/journal.pmed.1004358). *PLOS Medicine*. diff --git a/documentation/wiki/overview/index.md b/documentation/wiki/overview/index.md new file mode 100644 index 000000000..e86c068f3 --- /dev/null +++ b/documentation/wiki/overview/index.md @@ -0,0 +1,15 @@ +# Overview + +SimPaths is a family of models for individual and household life course events, all sharing common components. The framework is designed to project life histories through time, building up a detailed picture of career paths, family (inter)relations, health, and financial circumstances. It builds upon standardised assumptions and data sources, which facilitates adaptation to alternative countries. + +Although differences in the main structure with the [EU models](https://github.com/centreformicrosimulation/SimPathsEU) are minimal, this documentation is specific to the UK model. Careful attention is paid to model validation, and sensitivity of projections to key assumptions. + +The modular nature of the SimPaths framework is designed to facilitate analysis of alternative assumptions concerning the tax and benefit system, sensitivity to parameter estimates, and alternative approaches for projecting labour/leisure and consumption/savings decisions. + +## Sections + +- [Model Description](model-description.md) — what SimPaths models and why +- [Simulated Modules](simulated-modules.md) — the behavioural modules +- [Model Parameterisation](parameterisation.md) — how parameters are specified +- [Country Variants](country-variants.md) — adapting SimPaths to other countries +- [How to Cite](how-to-cite.md) — citation guidance and published research diff --git a/documentation/wiki/overview/model-description.md b/documentation/wiki/overview/model-description.md new file mode 100644 index 000000000..a1ac089cc --- /dev/null +++ b/documentation/wiki/overview/model-description.md @@ -0,0 +1,21 @@ +# Model Description + +SimPaths is a fully open-source structural dynamic microsimulation framework, designed to facilitate experimentation with alternative model assumptions. It is coded in Java using the [JAS-mine core](https://github.com/jasmineRepo/JAS-mine-core) and [JAS-mine GUI](https://github.com/jasmineRepo/JAS-mine-gui) simulation libraries. SimPaths models are currently estimated for the United Kingdom and Italy, and are under development for Hungary, Poland, and Greece. + +SimPaths implements a hierarchical architecture where individuals are organised in benefit units (for fiscal purposes), and benefit units are organised in households. The model projects data at yearly intervals, reflecting the yearly frequency of the survey data used to estimate model parameters. The model is composed of eleven modules: + +1. Ageing +2. Education +3. Health +4. Family composition +5. Social care +6. Investment income +7. Labour income +8. Disposable income +9. Consumption +10. Mental health +11. Statistical display + +Each module is composed of one or more processes; for example, the ageing module contains ageing, mortality, child maturation, and population alignment processes. Empirical specification of dynamic processes makes extensive use of cross-module characteristics (state variables). A graphical representation of the simulated modules is shown below: + +![model_structure](https://github.com/centreformicrosimulation/SimPaths/assets/56582427/d4c773a2-b720-4546-bca6-c76d07282dc4) \ No newline at end of file diff --git a/documentation/wiki/overview/parameterisation.md b/documentation/wiki/overview/parameterisation.md new file mode 100644 index 000000000..f951f7b8c --- /dev/null +++ b/documentation/wiki/overview/parameterisation.md @@ -0,0 +1,66 @@ +# Model Parameterisation + +The estimates for the utility functions used in the labour supply model are separately described in Richiardi, M. and He, Z. (2021), “_No one left behind: The labour supply behaviour of the entire Italian population_”, Centre for Microsimulation and Policy Analysis, mimeo. + +The estimates for the psychological distress models are separately described in Kopasker, D., et al. "_Evaluating the influence of taxation and social security policies on psychological distress: a microsimulation study of the UK during the COVID-19 economic crisis._" Social Science & Medicine (2024): 116953. + +The model has been parametrised for the UK using data described in the [data section](https://github.com/centreformicrosimulation/SimPaths/wiki/4.-Data). + +The most recent parametrisation of the model is [stored on Github](https://github.com/centreformicrosimulation/SimPaths/tree/develop/input). + + + +## 1. Description of the tax and benefit system display + +Description of the tax and benefit system is provided through UKMOD output files stored in the [EUROMODoutput](https://github.com/centreformicrosimulation/SimPaths/tree/develop/input/EUROMODoutput/) folder. Version developed on the basis of test data, which can be shared on GitHub, is available in the [training subfolder](https://github.com/centreformicrosimulation/SimPaths/tree/develop/input/EUROMODoutput/training). + +To learn more about UKMOD, visit [UKMOD's website](https://www.microsimulation.ac.uk/ukmod/). + + + +## 2. Model parameters + +Source: [Github](https://github.com/centreformicrosimulation/SimPaths/tree/develop/input). + +**align_ files** +Files listed below contain alignment targets. + +* align_educLevel.xlsx +* align_employment.xlsx +* align_popProjections.xlsx +* align_student_under30.xlsx + +**projections_ files** +Files listed below contain demographic projections. + +* projections_fertility.xlsx +* projections_mortality.xlsx + +**reg_ files** +Files listed below contain regression estimates for specific processes described in the [model structure](https://github.com/centreformicrosimulation/SimPaths/wiki/1.-Model-structure) section. + +* reg_RMSE.xlsx +* reg_childcarecost.xlsx +* reg_education.xlsx +* reg_employmentSelection.xlsx +* reg_fertility.xlsx +* reg_health.xlsx +* reg_health_mental.xlsx +* reg_home_ownership.xlsx +* reg_income.xlsx +* reg_labourCovid19.xlsx +* reg_labourSupplyUtility.xlsx +* reg_leaveParentalHome.xlsx +* reg_partnership.xlsx +* reg_retirement.xlsx +* reg_socialcare.xlsx +* reg_unemployment.xlsx +* reg_wages.xlsx + +**scenario_ files** +Files listed below contain parameters reflecting specific modelling assumptions. + +* scenario_CPI.xlsx +* scenario_employments_furloughed.xlsx +* scenario_parametricMatching.xlsx +* scenario_retirementAgeFixed.xlsx diff --git a/documentation/wiki/overview/simulated-modules.md b/documentation/wiki/overview/simulated-modules.md new file mode 100644 index 000000000..b16eaa480 --- /dev/null +++ b/documentation/wiki/overview/simulated-modules.md @@ -0,0 +1,406 @@ +# Simulated Modules + +## 1. Ageing + +The first simulated process in each period increments the age of each simulated person by one year. Any dependent child that reaches an exogenously assumed “age of independence” (18 years-of-age in the parameterization for the UK) is extracted from their parental benefit unit and allocated to a new benefit unit. Individuals are then subject to a risk of death, based on age, gender and year specific probabilities that are commonly reported as components of official population projections. Death is simulated at the individual level but omitting single parent benefit units (to avoid the creation of orphans). + +_Alignment_ + +Population alignment is performed to adjust the number of simulated individuals to national population projections by age, gender, region, and year. Alignment proceeds from the youngest to the oldest age described by national population projections. Each age is considered in two discrete steps. First, within each age-gender-region-year subgroup, the simulated number of individuals is compared against the associated population projection. Regions with too few simulated individuals (relative to the respective target) are partitioned from those with too many. Net “domestic migration” is then projected by moving individuals from regions with too many simulated people to those with too few, until all options for (net) domestic migration are exhausted. All migratory flows are simulated at the benefit unit level, with reference to the youngest benefit unit member. + +Following domestic migration, remaining disparities between simulated and target population sizes are adjusted to reflect international immigration (if the simulated population is too small), or emigration and death (if the simulated population is too large). Like domestic migration, international migration is simulated net of opposing flows and at the benefit unit level with reference to the youngest benefit unit member. Death is simulated in preference to international emigration for population alignment for all ages above an exogenously imposed threshold (65 for the UK). + +Except for the distinction between age, gender, region, and year, all transitions simulated for population alignment are randomly distributed. This means that the model does not reflect, for example, the higher incidence of international emigration among prior international immigrants. Furthermore, the model projects international immigration by cloning existing benefit units without taking into consideration any systematic disparities between the domestic and migrant populations, including with regard to their respective financial circumstances. + +_Leaving parental home_ +Individuals who have recently attained the assumed age of independence and were moved to separate benefit units are evaluated to determine if they leave their parental home. Any individual still in education is assumed to remain a member of their parental household. For mature children not in education, the probability of leaving their parental home is based on a probit model conditional on gender, age, level of education, lagged employment status, lagged household income quintile, region, and year (to reflect observed time trends). Mature children who are projected to remain in their parental homes may leave in any subsequent year. + + + +## 2. Education +The education module determines transitions into and out of student status. Students are assumed not to work and therefore do not enter the labour supply module. Individuals who leave education have their level of education re-evaluated and can become employed. + +
+ +Education + +_Student status_ + +Individuals leave continuous full-time education during an exogenously assumed age band (16 to 29 for the UK). The probability of leaving continuous full-time education within this age band is described by a probit model conditional on gender, age, mother’s education level, father’s education level, region, and year. + +Individuals who are not in education may re-enter education within another exogenously assumed age band (16 to 45 for the UK). In this case, the probability of re-entering education is described by a probit model conditional on gender, age, lagged level of education, lagged employment status, lagged number of children in the household, lagged number of children aged 0-2 in the household, mother’s and father’s education levels, region, and year. + +Students are considered not to work. Those who return to education can leave again in any subsequent year. + +_Educational level_ + +Individuals who cease to be students are assigned a level of education based on an ordered probit model that conditions on gender, age, mother’s and father’s education level, region, and year. The education level of individuals who exit student status after re-entering education may remain unchanged or increase but cannot decrease. + + + +## 3. Health + +The health module projects an individual’s health status, comprising both self-rated general health and mental health metrics (based on a clinically validated measure of psychological distress using a Likert scale and a caseness indicator), and determines whether an individual is long-term sick or disabled (in which case, he/she is not at risk of work and may require social care). + + +
+ 3.1 Physical health + +Physical health status is projected on a discrete 5-point scale, designed to reflect self-reported survey responses (between “poor” and “excellent” health). Physical health dynamics are based on an ordered probit, distinguishing those still in continuous education. For continuing full-time students, the ordered probit conditions on gender, age, lagged benefit unit income quintile, lagged physical health status, region, and year. The same variables are considered for individuals who have left continuous education, with the addition of education level, lagged employment status, and lagged benefit unit composition. + +Health (self-rated) + +
+ + +
+ 3.2 Long-term sick and disabled + +Any individual aged 16 and above who is not in continuous education can become long-term sick or disabled. The probability of being long-term sick or disabled is described by a probit equation defined with respect to lagged disability status, prevailing and lagged physical health status, gender, age, education, income quintile, and lagged family demographics. + +Disability + +This Disability module is integrated in the `Person.health()` (Physical health) method in the current SimPaths model. +We split disability out for illustration purpose. + +
+ + +
+ 3.3 Psychological distress + +* Psychological distress 1 (baseline level and caseness) +In each simulation cycle, a baseline level of psychological distress for individuals aged 16 and over is determined using the 12-item General Health Questionnaire (GHQ-12). Two indicators of psychological distress are computed: a Likert score, between 0 and 36, estimated using a linear regression model; and a dichotomous indicator of the presence of potentially clinically significant common mental disorders is obtained using a logistic regression model. Both specifications are conditional on the lagged number of dependent children, lagged health status, lagged mental health, gender, age, level of education, household composition, region, and year. + +Mental health (levels)\ +**Figure, psychological distress in levels** + + +* Psychological distress 2 (impact of economic transitions and exposure to the Covid-19 pandemic) +The baseline measures of the level and caseness of psychological distress described above are modified by the effects of economic transitions and non-economic exposure to the Covid-19 pandemic. Fixed effects regressions are used to estimate the direct impact of transitions from employment to non-employment, non-employment to employment, non-employment to long-term non-employment, non-poverty to poverty, poverty to non-poverty, and poverty to long-term poverty, as well as changes in growth rate of household income, a decrease in household income, and non-economic effect of the exposure to Covid-19 pandemic in years 2020 and 2021. The effects of economic transitions are estimated on pre-pandemic data to ensure validity in other periods. The non-economic effects of the pandemic are estimated using a multilevel mixed-effects generalized linear model. + +Mental health (cases)\ +**Figure, psychological distress in cases** + +
+ + +## 4. Family composition + +The family composition module is the principal source of interactions between simulated agents in the model. The module projects the formation and dissolution of cohabiting relationships and fertility. Where a relationship forms, then spouses are selected via a matching process that is designed to reflect correlations between partners’ characteristics observed in survey data. The proportion of the population in a cohabiting relationship is, by default, aligned to the population aggregate in the years for which observational data is available, to account for changes in household structure introduced by the population alignment. + +Females in couples can give birth to a (single) child in each simulated year, as determined by a process that depends on a range of characteristics including age and presence of children of different ages in the household. In case of divergence from the officially projected number of newborns, fertility rates are adapted by an alignment process to match population projections for newborn children distinguished by gender, region, and year. + + + +
+
+ 4.1 Family composition module code structure +
+Family composition module overview + +This figure illustrates the timeline of the family composition module as defined by the `buildSchedule()` method in the SimPathsModel class. +Every year the model conducts union matching to form households and benefit unions. Specifically, the model executes: + +1. Process `UpdatePotentialHourlyEarnings`, which calls `updateFullTimeHourlyEarnings()` method (`Person` class) and compute `fullTimeHourlyEarningPotentials` (used in union matching process). +2. Process `Cohabitation`, which calls `cohabitation()` method (`Person` class) and compute a group of persons to be considered in the union matching process (a map of `personsToMatch`). +3. Process `CohabitationAlignment`, which is NOT called by default. +4. Process `PartnershipDissolution` that determines which pairs of partner dissolves partnership and can be considered in the concurrent union matching process. + + +`onEvent()` method, **case UnionMatching**. There are three union matching approaches (SBAM, Parametric, ParametricNoRegion) in the SimPaths model. By default, the model uses the ParametricNoRegion approach, which calls unionMatching first and then the unionMatchingNoRegion methods to determine matched pairs of males and females. + +`unionMatching()` method clears existing matched pairs first, then evaluates union matching for candidates in the same region. For each region, the method: + +1. Creates fresh empty sets `unmatchedMales` and `unmatchedFemales`, +2. Populates these sets by copying the current period’s eligible-to-match males and females stored in the `personsToMatch` group, +3. Stores unmatched males and females in a pair of "unmatched", +4. Sends every unmatched pair to the `evalMatches()` method, which creates matched couples and removes them from the `personsToMatch` group. + +`unionMatchingNoRegion()` method is executed after the `unionMatching` method updates the `personsToMatch` group. Then, this method executes union matching procedures again by relaxing the region restriction. Now all candidates in the `personsToMatch` group are evaluated in a national pool: + +1. For every region, "unmatchedMales" and "unmatchedFemales" are populated with reduced `personsToMatch` group of males and females. +2. All unmatched males and females are stored in pairs of unmatched, +3. These pairs of unmatched are sent to the `evalMatches()` method. + +`evalMatches()` method. It creates an object called `unionMatching` of the `UnionMatching` class. calls `unionMatching.evaluate("GM")` to execute the `GlobalMatching` algorithm, and during its execution `localMatch` is invoked for each selected pair, thereby populating `matchesHere` (consists of matched pairs) inside the `unionMatching` object. This method then removes matched pairs from personsToMatch. We discuss the `UnionMatching` class in the next section. +
+ + +
+
+ 4.2 Partnerships and cohabitation + +Individuals aged 18 and over who do not have a partner may decide to enter a partnership based on the outcome of a probit model. For students, the probit conditions on gender, age, lagged household income quintile, lagged number of (all) dependent children, lagged number of children aged 0-2, lagged self-rated health status, region, and year. For non-students, the probit conditions on the same set of variables as for students, expanded to include level of education and lagged employment status. + +Individuals who enter a partnership are matched using either a parametric or non-parametric process, focussing exclusively on opposite-sex relationships. In the (default) parametric matching process, the model searches through the pools of males and females identified as cohabiting in each simulated period to minimise the distance between individual expectations, in terms of partner’s ideal earnings potential and age, and individual characteristics of each individual in the matching pool. The matching procedure (see `UnionMatching` class) prioritizes matching individuals within regions, but if the sufficient quantity and / or quality of matches cannot be achieved, matching is performed nationally. In contrast, the non-parametric process uses an iterative proportional fitting procedure to replicate the distribution of matches observed in survey data between different types of individuals, where a type is defined as a combination of gender, region, education level, and age. + +
+ + +
+
+ 4.3 Cohabitation method code structure + +The cohabitation method (in Person class) computes a group of persons to be considered in the union matching process (a map of personsToMatch). + +Partnership method + + + +
+ + +
+
+ 4.4 UnionMatching class code structure + +The `UnionMatching` class receives sets of unmatched males and females (passed as unmatched, constructed from personsToMatch) and forms new couples by creating benefit units and households via male.setupNewBenefitUnit(female, true). + +UnionMatching + + +The key method, `evaluateGM()`, implements the global matching procedure as follows: + +1. Passes the current sets of unmatched males and females to the core `GlobalMatching.matching()` algorithm. +2. Constructs all feasible male–female candidate pairs using the GlobalMatchingPair class, where each pair stores: a male, a female, a matching score computed by `localGetValue(male, female)`. +3. Sorts the list of candidate pairs in ascending order of their matching score. +4. Iterates through the ranked candidate pairs from the lowest score upward. For each candidate pair, if both individuals are still unmatched, the pair is accepted: `localMatch(male, female)` is called and both individuals are removed from the availability sets. Candidate pairs involving already matched individuals are skipped. +5. For each accepted pair, `localMatch`: records the match, removes the individuals from the unmatched male and female sets, and updates the simulation state by creating a new benefit unit and household (the core purpose of the union-matching process). + + +`localMatch()`. This method is called as "match" in the `GlobalMatching.matching()` core algorithm. Its arguments (male, female) are candidates selected by GlobalMatching during execution. GlobalMatching.matching returns the unmatched ones (leftovers), and for each matched pair it calls `UnionMatching.match(agent1, agent2)`. This callback calls localMatch(male,female), which: + +1. adds the pair to "matches" (`matches.add(...)`), +2. removes males and females in matches from the UnionMatching object’s `unmatchedMales`/`unmatchedFemales` sets (which are references to the sets passed in from `SimPathsModel.evalMatches`). +3. update simulation state:\ + 3.1. set the female's region to be the male's region,\ + 3.2. years in a partnership (`male/female.detDcpyy`),\ + 3.3. household state via `male.setupNewBenefitUnit(female, true)`. + + +`localGetValue()`. This method is called as `getValue` in the `GlobalMatching.matching()` core algorithm. Its arguments (male, female) are candidate pairs considered by GlobalMatching. This method calculate the score for a pair of male and female for the sorting purpose in the `GlobalMatching.matching()`. Within the method: + +1. `ageDiff` is the male's age minus the female's age, +2. `potentialHourlyEarningsDiff` is the male's full time hourly earnings potential minus the female's. In SimPaths UK, this full time hourly earnings potential is updated earlier in the yearly schedule, via the `Person.updateFullTimeHourlyEarnings()` method in each period of the simulation, before cohabitation and union matching. +3. earningsMatch is calculated as `potentialHourlyEarningsDiff` minus `female.getDesiredEarningsPotentialDiff()`, where the latter is from the `setMarrageTargets()` method in the constructor for the `Person` class, . +4. `ageMatch` is calculated as `ageDiff` minus `male.getDesiredAgeDiff()`. The latter is also from the constructor in `Person` class, `setMarrageTargets()` method. +5. If the male is not the female's father, and the female is not the male's mother, and `abs(ageMatch) < AGE_DIFFERENCE_INITIAL_BOUND`, and a`bs(earningsMatch) < POTENTIAL_EARNINGS_DIFFERENCE_INITIAL_BOUND`, the `score = earningsMatch^2 + ageMatch^2`. Else, the score is set to positive infinity to exclude such a pair. + +
+ + +
+ +
+ 4.5 Partnership dissolution + +Partnership dissolution is modeled at the benefit unit level with the probability described by a probit model conditional on female partner’s age, level of education, lagged personal gross non-benefit income, lagged number of (all) children, lagged number of children aged 0-2, lagged self-rated health status, lagged level of education of the spouse, lagged self-rated health status of the spouse, lagged difference between own and spouse’s gross, non-benefit income, lagged duration of partnership in years, lagged difference between own and spouse’s age, lagged household composition, lagged own and spouse’s employment status, region, and year. + +_Alignment_ +The matching processes for new relationships outlined above fails to identify matches for all individuals flagged as entering a partnership by the related probit equations. This tends to bias the simulated population, resulting in an under-representation of partner couples. An alignment process is consequently used to match the rate of incidence of partner couples to survey targets. The alignment process works by adjusting the intercept of the probit relationships governing relationship formation, increasing the intercepts where the incidence of couples is too low. + +
+ + +
+ +
+ 4.6 Fertility + +Females aged 18 to 44 can give birth to a child whenever they are identified in a partnership. The probability of giving birth is described by a probit model conditional on a woman’s age, benefit unit income quintile, lagged number of children, lagged number of children aged 0-2, lagged health status of the woman, lagged partnership status for those in continuous education. For those not in continuous education, the probability of giving birth is described by a probit model conditional on a woman’s age, the fertility rate of the UK population, benefit unit income quintile, lagged number of children, lagged number of children aged 0-2, lagged health status of the woman, lagged partnership status, lagged labour market activity status, level of education, and region. The inclusion of the overall fertility rate allows the model to capture fertility projections for future years, whereas the overall change in projected fertility is distributed across individuals according to their observable characteristics. + +
+ +fertility module + +_Alignment_ +The number of projected births is aligned to the number of newborns supplied by the official projections used for population alignment. The alignment procedure randomly samples fertile women and adjusts the outcome of the fertility process until the target number of newborns has been met. + + +
+ +
+ + + + +## 5. Social care +The social care module projects provision and receipt of social care activities for people in need of help due to poor health or advanced age. The module is designed to distinguish between formal and informal social care, and the social relationships associated with informal care. The social care module accounts for the time cost incurred by care providers with respect to informal care, and the financial cost incurred by care receivers with respect to formal care. + +_Receipt of social care_ +The model distinguishes between individuals aged above and below an age threshold when projecting receipt of social care. This reflects the relatively high prevalence of social care received by older people, for whom more detailed information is often reported by publicly available data sources. + +_Receipt of social care among older people_ +For individuals aged above an exogenously defined threshold (65 years in the UK), the model begins by considering whether an individual is in need of care. This is simulated as a probit equation that varies by gender, education, relationship status, whether care was needed in the preceding year, self-reported health, and age. The probability of receiving care is projected using a similar set of explanatory variables. Where an individual is identified as receiving care, a multinomial logit equation is used to determine if the individual receives: i) only informal care; ii) formal and informal care; or iii) only formal care. This multinomial logit varies by education, relationship status, and age band in addition to a lag dependent variable. + +For individuals projected to receive informal care, a multi-level model is used to distinguish between alternative care providers. The first level considers whether a partner provides informal care, for individuals with partners. For individuals who receive social care from their partner, the second level uses a multinomial logit to consider whether they also receive care from a daughter, a son, or someone else (other). For individuals in receipt of informal care who do not have a partner caring for them, another multinomial logit is used to select from six potential alternatives that allow for up to two carers from “daughter”, “son”, and “other”. Log-linear equations are then used to project the number of hours of care received from each identified carer. Finally, hours of formal care are converted into a cost, based on the year-specific mean hourly wages for all social care workers. + +_Receipt of social care among younger people_ +Receipt of social care among individuals under the exogenously assumed age threshold is simulated using a more stylised approach to that described for older people, reflecting the less detailed data available for parameterization. In this case, the model focuses exclusively on informal social care for individuals simulated to be long-term sick or disabled. At the time an individual is projected to enter a disabled state, a probit equation is used to identify whether the individual receives informal social care. This identification is assumed to persist for as long as the person remains disabled. + +If an individual under age 65 is identified as receiving social care, then the time of care received is described by a log-linear equation. + +_Provision of social care_ +The model is adapted to project provision of social care by informal sector providers; the characteristics of formal sector providers of social care are beyond the current scope of the model. The approach adopted for simulating receipt of social care described above identifies the incidence and hours of informal social care that individuals are projected to receive. In the case of people over the assumed age threshold, it also identifies the relationship between those in receipt of informal social care and their informal care providers, and the persistence of those care relationships. These details consequently provide much of the information necessary to simulate provision of informal social care, in addition to the receipt of care. + +Nevertheless, the data sources for starting populations considered for SimPaths – with the notable exception of partners – generally omit social links that are implied to exist between informal social care providers and those receiving care. Specifically, links between adult children and their parents, and the wider social networks that often supply informal social care services are generally not recorded. The method that is used to project informal provision of social care is designed to accommodate limitations of the simulated data in a way that broadly reflects projection of social care receipt discussed above. + +Specifically, the model distinguishes between four population subgroups with respect to provision of informal social care: (i) no provision; (ii) provision only to a partner; (iii) provision to a partner and someone else; and (iv) provision but only to non-partners. For people who are identified as supplying informal care to their partner via the process described above, a probit equation is used to distinguish between alternatives (ii: provision only to partner) and (iii: provision to a partner and someone else). Similarly, for the remainder of the population, another probit equation is used to distinguish between alternatives (i) and (iv). A log linear equation is then used to project number of hours of care provided, given the classification of who care is provided to. + + + + + +## 6. Investment income + +The investment income module projects income from investment returns and (private) pensions. The approach taken to project these measures of income depends upon the model variant considered for analysis. Where consumption/savings decisions are simulated using a structural behavioural framework, then asset income is projected based on accrued asset values and exogenously projected rates of return. Alternatively, computational burden of model projections can be economised by proxying non-labour income without explicitly projecting asset holdings. + +_Retirement_ +Simulation of retirement varies slightly depending on the accommodation of forward-looking expectations. In both cases, retirement is possible for any adult above an assumed age threshold (50 in the parametrisation for the UK). When forward-looking expectations are implicit, entry to retirement is based on a probit model that controls for gender, age, education, lagged employment status, lagged (benefit unit) income quintile, lagged disability status, indicator to distinguish individuals in excess of state pension age (accounting for changes in the state pension age), region, and year. For couples, characteristics of the spouse (employment status, reaching retirement age) also affect the probability of retirement. When forward-looking expectations are explicit, then entry to retirement is considered to be a control variable. Retired individuals may receive pension income. + +_Private pension income_ +When wealth is implicit in the model, then private pension income is projected using a linear regression model that conditions on age, level of education, lagged household composition, lagged health status, lagged private pension income, region, and year for individuals who continue in retirement. For individuals entering retirement, the probability of receiving private pension income is first determined using a logit model that conditions on having reached the state pension age, level of education, lagged employment status, lagged household composition, lagged health status, lagged hourly wage potential, region, and year. The amount of pension income is projected using a linear regression model conditional on the same observed characteristics. + +When the simulation projects wealth explicitly, then an assumed fraction of benefit unit wealth at the time of retirement is converted into a life annuity, or joint-life annuity for adult couples. Annuity rates in the model are actuarily fair, given (cohort specific) mortality rates and an assumed internal rate of return. + +_Capital income_ +When wealth is not projected by the model, then the incidence of capital income among the simulated population aged 16 and over is based on probabilities described by a logit regression equation that varies by age, lagged health, lagged gross employment and capital income, region and year. For individuals not in continuous education, the list of explanatory variables for the logit equation also includes education status, lagged employment status, and lagged household composition. + +For individuals simulated to be in receipt of capital income, the amount of capital income is described by linear regression models that condition on gender, age, lagged health status, lagged gross employment income, lagged capital income, region, and year for individual in continuous education. Individuals not in continuous education are also distinguished by their level of education, lagged employment status, and lagged household composition. + +When wealth is explicitly projected by the model, then capital income is the product of net asset holdings and an assumed rate of return. The rate of return varies by year, and by the value of benefit unit net wealth, $w_{i,t}$, as described by: + +$r_{i,t} = r_{a,t}$ if $w_{i,t} >= 0$ and +$r_{dl,t} + (r_{du,t} - r_{dl,t}) \phi_{i,t}$ otherwise + +where $i$ denotes the benefit unit and $t$ denotes time. $1 \ge \phi_{i,t} \ge 0$ denotes the (bounded) ratio of benefit unit debt to full-time potential earnings. Assuming $r_{du,t} \ge r_{dl,t}$ reflects a ‘soft constraint’ where interest rates increase with indebtedness. + + +## 7. Labour income +The labour income module projects potential (hourly) wage rates for each simulated adult in each year and their associated labour activity. Given potential wage rates, hours of paid employment by all adult members of a benefit unit are generated. Labour (gross) income is then determined by multiplying hours worked by the wage rate. + +_Wage rates_ +Hourly wage rates are simulated for each adult in the model based on Heckman-corrected regressions stratified by gender and lagged employment status (distinguishing between employed and not-employed individuals) that include as explanatory variables, part-time employment identifiers, age, education, student status, parental education, relationship status, presence of children, health, and region. For individuals observed in employment in the previous year, lagged (log) hourly wage rates are also included as an explanatory variable. + +_Employment decisions_ +Two alternative methods for projecting employment decisions can be considered by the model. These alternatives are both designed to reflect the influence of financial incentives on behaviour and are distinguished by whether they reflect forward-looking expectations. + +The default specification of SimPaths projects labour supply using a non-forward-looking random utility model. The method projects labour supply as though employment decisions are made to maximise within-period benefit unit utility over a discrete set of labour/income alternatives. Given any labour alternative, labour income is computed by combining hours of work with the respective hourly wage rate. The utility of the benefit unit is calculated using a quadratic utility function and takes as arguments benefit unit disposable income and the number of hours worked by adult members. The UK labour supply model is estimated using UKMOD in combination with UKHLS data for 2019/20. The estimation employs an alternative-specific conditional logit approach (Stata command asclogit). Below a more detailed description of the model is provided. + +Labour-supply-flexible individuals are defined as those of working age (15-75 years) who are not students, not disabled (and not on disability benefits), not pensioners, and who do not have a partner or are partnered with a non-flexible individual. Couples are those where both partners satisfy the flexible-worker conditions. + +Formally, each labour-supply-flexible individual chooses among seven mutually exclusive alternatives: non-employment and six weekly hours categories. The definition of alternatives is as follows: +- Alternative 0: Weekly hours: 0 Category: Non-employment +- Alternative 1: Weekly hours: 10 Category: 6-15 hours +- Alternative 2: Weekly hours: 20 Category: 16-25 hours +- Alternative 3: Weekly hours: 30 Category: 26-35 hours +- Alternative 4: Weekly hours: 38 Category: 36-40 hours +- Alternative 5: Weekly hours: 45 Category: 41-49 hours +- Alternative 6: Weekly hours: 55 Category: 50+ hours + +In couples’ cases, the choice set expands into all combinations of the two partners’ options. + +The observed choice, together with simulated incomes and computed leisure, identifies the utility parameters. + +Leisure is computed as total weekly hours minus the worked-hours bracket minus hours spent providing care. The idea is that caregiving is a time cost and should reduce leisure in the utility function. + +Disposable income for each alternative is simulated using UKMOD as follows. +Hourly wages are predicted using a Heckman two-step selection model based on UKHLS-UKMOD panel data. The model is estimated separately for individuals with and without previous employment, and within each group separately for men and women. Predicted wages are used for all individuals as this approach performs better than the alternative one (using predicted wages for non-workers only). The predicted hourly wage is then used to compute employment income for each hour's alternative. + +To correctly simulate disposable income for each hours scenario, we follow these conventions: +- Benefits that UKMOD can simulate (e.g. Universal Credit, tax credits) ==> rely on UKMOD to produce for every hour's alternative. +- Benefits that UKMOD cannot simulate and that are dependent on employment income ==> set to zero for all counterfactual alternatives (otherwise they would appear only in the observed alternative and distort utility estimation). +- Benefits UKMOD cannot simulate and that are not dependent on employment income and are compatible with work ==> copy the observed amount to all alternatives. +- Benefits incompatible with work but received in the observed state ==> drop the individual from the estimation sample (e.g. this applies to some disability benefits, their presence implies some choices are not feasible). + +UKMOD calculates disposable income for each scenario, and the individual's observed choice identifies which of the seven alternatives they actually selected. + +The estimation sample is split into subsamples, and the asclogit is used to estimate the marginal utilities of income and leisure and the preference shifters for each of these subsamples: +- Single females +- Single males +- Couples where both partners are flexible +- Singles with a non-flexible partner, females +- Single with a non-flexible partner, males +- Adult children, females +- Adult children, males +In practice, limited sample sizes forced us to pool the sexes for couples with one flexible partner and for adult children. + +Utility is modelled as a quadratic function of disposable income and leisure. +- In the models for single women and single men, a dummy for full-time work is included to capture any fixed utility shift associated with full-time employment. The specification also conditions on an individual-level characteristic, years spent in employment, to account for accumulated labour-market experience. +- In the couples’ model, full-time work dummies and previous labour-market experience variables are included for both partners. +- In the model for singles with a dependent non-flexible partner, men and women are estimated jointly, and differences by sex are accounted for by interacting the full-time work dummy with a sex dummy. +- In the model for adult children, the sample is also pooled, with an interaction between sex and full-time work, but previous labour-market experience is not included as it was not statistically significant. + +The model can also be directed to project labour and discretionary consumption to reflect forward-looking expectations for behavioural incentives. As for the implicit expectations case, the unit of analysis is the benefit unit. Incentives are translated into behaviour via an assumed intertemporal utility function. By default, the model adopts a nested constant elasticity of substitution (CES) utility function. + + +Each adult is considered to have three alternative labour supply options, corresponding to full-time, part-time and non-employment. Labour supply and discretionary consumption are projected as though they maximise the assumed utility function, subject to a hard constraint on net wealth and assumed agent expectations. Expectations are “substantively rational” in the sense that uncertainty is characterised by the random draws that under dynamic projection of modelled characteristics. As no analytical solution to this problem exists, numerical solution methods are employed. + +The model proceeds in two discrete steps. The first step involves solution of the lifetime decision problem for any potential combination of agent specific characteristics, with solutions stored in a look-up table. The second step uses the look-up table as the basis for projecting labour supply and discretionary consumption. + +_Alignment_ +When the default specification of SimPaths for projecting labour supply is used, the estimated utility of single men, single women, and couples is adjusted to align the aggregate employment rate to the employment rate observed in the data in the validation period. The final adjustment value is used in the subsequent periods, for which no data is available. This procedure accounts for the existence of unemployment in the real economy and the fact that labour supply decisions simulated using the random utility model assume no constraints on labour demand in the economy. + + +## 8. Disposable income +Disposable income is simulated by matching each simulated benefit unit in each projected period with a donor benefit unit reported by a tax-benefit reference database, following the procedure described by [van de Ven et al. (2022)](https://www.iser.essex.ac.uk/wp-content/uploads/files/working-papers/cempa/cempa3-22.pdf). The database stores details of taxes and benefits alongside associated demographic and private income characteristics for a sample of benefit units. This database could be populated from a wide range of sources. The approach was originally formulated to draw upon output data derived from the UK version of EUROMOD (UKMOD), and then extended to accommodate projections from any EUROMOD country. + +The matching procedure for benefit units applies coarsened exact matching over a number of discrete-valued characteristics, followed by nearest-neighbour matching on a set of continuous variables. The nearest neighbour matching is performed with respect to Mahalanobis distance measures evaluated over multiple continuous valued characteristics. + +The default set of discrete value characteristics considered for matching includes age of the benefit unit reference person, relationship status, numbers of children by age, hours of work by each adult member, disability status, and informal social care provision. Similarly, the default set of continuous value matching characteristics includes original (pre-tax and benefit) income, second income (to allow for income splitting withing couples), and formal childcare costs. + +Having matched a simulated benefit unit to a donor, disposable income is imputed via one of two methods. For benefit units with original income above a “poverty threshold”, disposable income is imputed by multiplying original income of the simulated benefit unit by the ratio of disposable to original income of the donor unit. For benefit units below the considered poverty threshold, disposable income is set equal to the (growth adjusted) disposable income of the donor. + +Finally, adjustments to account for public subsidies for the costs of (formal) social care are evaluated separately from the database approach described above, based on internally programmed functions. This is done because public subsidies for social care are not always included in database sources (e.g. tax-benefit models) considered for analysis. + + + +## 9. Consumption +Given disposable income and household demographics, the consumption module projects measures of benefit unit expenditure. Where the model projects wealth, then a simple accounting identity is used to track the evolution of benefit unit assets through time. A regression-based homeownership process predicts if the primary residence is owned by either of the responsible adults in a benefit unit, in which case the benefit unit is considered to own its home. + +_Non-discretionary expenditure_ +The model can project two forms of non-discretionary benefit unit expenditure: formal social care costs and formal childcare costs. Social care costs are projected based on projections of hours of formal social care received and assumed hourly wage rates for social care workers. + +Childcare costs are projected using a double-hurdle model, characterised by a probit function describing the incidence of formal childcare costs and a linear least-squares regression equation describing the value of childcare costs when these are incurred. Both equations include the same set of explanatory variables describing the number and age of dependent children in a benefit unit, the relationship status and employment status of adults in the benefit unit, whether any adult in the benefit unit is higher educated, region, and year. + +_Discretionary consumption_ +The model can be directed to project employment and discretionary consumption jointly to reflect forward-looking expectations for behavioural incentives. The projection of discretionary consumption varies depending on whether forward-looking expectations are chosen to be explicit or implicit within a simulation. + +By default, yearly equivalised disposable income is calculated by adjusting disposable income to account for benefit unit demographic composition via the modified OECD scale. Equivalised consumption is set equal to equivalised disposable income for retired individuals, and to disposable income adjusted by a fixed discount factor to account for an implicit savings rate otherwise. The assumed savings rate, in turn, influences simulated capital income. + +When expectation are explicit, the model evaluates solutions to the lifetime decision problem in the form of a look-up table when directed to reflect forward-looking expectations for behavioural incentives. In the case of discretionary consumption, the look-up table stores the ratio of consumption to “cash on hand”, where cash on hand is the sum of net wealth, disposable income, and available lines of credit. This ratio has the advantage that it is bounded between zero and one, which facilitates the computational routines and consideration of selected policy counterfactuals. + +_Assets accumulation_ +Net wealth is the key transition mechanism that balances intertemporal behavioural incentives when forward-looking expectations are treated explicitly by the model. In this case, dynamic evolution of wealth in most periods is described by the accounting identity: + +$w_{i,t} = w_{i,t-1} + y_{i,t} - c_{i,t} - \bar{c}_{i,t}$ + +where $w_{i,t}$ denotes the net wealth of benefit unit $i$ in period $t$, $y_{i,t}$ disposable income, $c_{i,t}$ discretionary consumption, and $\bar{c_{i,t}}$ non-discretionary expenditure. The only departures from equation above are at the time of retirement if $w_{i,t} > 0$, when a fixed fraction of net wealth is converted into a fixed life annuity. + + +_Homeownership_ +Although net wealth is not disaggregated in the model, the incidence of homeownership is reflected, as this is used as an input to for projection of psychological distress. Homeownership is evaluated at the benefit unit level, by considering if at least one of the adult occupants is classified as a homeowner. At the individual level, homeownership is determined using a probit regression model conditional on gender, age, lagged employment status, education level, lagged self-rated health, lagged benefit unit income quintile, lagged gross personal non-employment non-benefit income, region, lagged household composition, lagged spouse’s employment status, and a time trend. + + + + +## 10. Mental health + +A secondary subjective-wellbeing process adjusts estimates obtained by the primary process to account for the effect of exposure to labour market transitions, such as moving in and out of employment and/or poverty. +Specifically, in the SimPaths model, method `Person.healthMentalHM2level()` corresponds to the Step 2 of such a mental health evaluation, as illustrated in the right half of the Figure 1 below: + +Mental health (levels)\ +**Figure 1, psychological distress in levels** + +The first and second processes for mental health in cases are combined, as illustrated in Figure 2, where processes `HM2CasesFemales` and `HM2CasesMales` correspond to the second process. + +Mental health (cases)\ +**Figure 2, psychological distress in cases** + + + + +## 11. Statistical display +At the end of each simulated year, SimPaths generates a series of year specific summary statistics. All of these statistics are saved for post-simulation analysis, with a subset of results also reported graphically as the simulation proceeds. + diff --git a/documentation/wiki/research/index.md b/documentation/wiki/research/index.md new file mode 100644 index 000000000..3ed9994cb --- /dev/null +++ b/documentation/wiki/research/index.md @@ -0,0 +1,16 @@ +# Research + +## 1. SimPaths reference paper + +If you use SimPaths or derived work, please cite: + +Bronka P, van de Ven J, Kopasker D, Katikireddi SV, Richiardi M (2025). [SimPaths: an open-source microsimulation model for life course analysis](https://microsimulation.pub/articles/00318). *International Journal of Microsimulation*, 18(1): 95-133. + + +## 2. Further references + +- van de Ven J, Bronka P, Richiardi M (2025). [Welfare effects of social care policies](https://www.microsimulation.ac.uk/publications/publication-588564/). CeMPA WP 5/25. +- van de Ven J, Bronka P, Richiardi M (2024). [The life course effects of care](https://www.microsimulation.ac.uk/publications/publication-578383/). CeMPA WP 7/24. +- Richiardi M, Bronka P, van de Ven J (2025). [Attenuation and reinforcement mechanisms over the life course](https://www.sciencedirect.com/science/article/pii/S0167268125000319). *Journal of Economic Behavior & Organization*, 231: 106911. +- Kopasker D, Bronka P, Thomson RM, Khodygo V, Kromydas T, Meier P, Heppenstall A, Bambra C, Lomax N, Craig P, Richiardi M, Katikireddi SV (2024). [Evaluating the influence of taxation and social security policies on psychological distress: a microsimulation study of the UK during the COVID-19 economic crisis](https://doi.org/10.1016/j.socscimed.2024.116953). *Social Science and Medicine*: 116953. +- Thomson RM, Kopasker D, Bronka P, Richiardi M, Khodygo V, Baxter AJ, Igelström E, Pearce A, Leyland AH, Katikireddi SV (2024). [Short-term impacts of Universal Basic Income on population mental health inequalities in the UK: A microsimulation modelling study](https://journals.plos.org/plosmedicine/article?id=10.1371/journal.pmed.1004358). *PLOS Medicine*. diff --git a/documentation/wiki/user-guide/gui.md b/documentation/wiki/user-guide/gui.md new file mode 100644 index 000000000..cc5792fe8 --- /dev/null +++ b/documentation/wiki/user-guide/gui.md @@ -0,0 +1,91 @@ +# The Graphical User Interface + +# 1. Introduction +In this section, we discuss the different components that make up the JAS-mine Graphical User Interface (GUI). + +![SimPaths GUI Screenshot](https://raw.githubusercontent.com/centreformicrosimulation/SimPaths/develop/docs/figures/SimPaths%20GUI.png) + +JAS-mine supports three different types of execution mode:- [interactive mode](https://www.microsimulation.ac.uk/jas-mine/resources/cookbook/start/), [batch mode](https://www.microsimulation.ac.uk/jas-mine/resources/cookbook/start/) and [multi-run mode](https://www.microsimulation.ac.uk/jas-mine/resources/tutorials/run-a-simulation-many-times/). The most common mode for prototyping a JAS-mine project, developing an intuition about how it works and demonstrating it to an audience is the interactive mode. This features a graphical user interface, where model parameters can be set and updated during a simulation run, and pre-determined graphical objects can be displayed to allow for real-time inspection of a number of the model's output quantities. + +The interactive mode is launched by default when executing the Start class of a standard JAS-mine project (as created using the JAS-mine Plugin for Eclipse IDE tool). In order to turn off the GUI when using a standard JAS-mine project, the user should go to the main method in the project's Start class, and ensure that the Boolean variable `showGUI` (defined in the first line of the main method) is set to false: +```java +public static void main(String[] args) { + + boolean showGui = true; // Toggle GUI on (off) by setting showGUI to true (false) + SimulationEngine engine = SimulationEngine.getInstance(); + MicrosimShell gui = null; + if (showGui) { + gui = new MicrosimShell(engine); + gui.setVisible(true); + } + engine.setBuilderClass(StartDemo.class); + engine.setup(); +} +``` + +# 2. Components + +## 1.1 Menus + +![JAS-mine GUI Menu](https://www.microsimulation.ac.uk/wp-content/uploads/documentation/JAS-mine-GUI-menu.png) + +There are three menu tabs at the top of the JAS-mine: + +* Simulation – this menu contains a list of the buttons that appear in the Simulation Control Pane below the Menu tabs, plus the simulation's engine status (which includes information about the simulation run number, random number seed and event list references). +* Tools – contains the '**[Database explorer](https://www.microsimulation.ac.uk/jas-mine/resources/cookbook/queries/)**' that opens up the web browser to interact with the simulation's input or output databases (if any). This also includes the 'Print windows positions' tool that prints to the output stream window the co-ordinates of the corner positions of all widgets (parameter boxes and graphs) in the main graphical window. +* Help – features the 'About JAS-mine' option that opens up a window containing credits for JAS-mine and the terms of the GNU LESSER GENERAL PUBLIC LICENSE, in addition to information about the system environment being used to run JAS-mine simulations such as the memory allocated to the Java Virtual Machine and the version of Java. + +## 1.2 Simulation Control Pane + +![JAS-mine Buttons](https://raw.githubusercontent.com/centreformicrosimulation/SimPaths/develop/docs/figures/SimPaths-Buttons.png) + +Below the Menu tabs are the simulation control buttons. The user can easily discover the meaning of each of the buttons by hovering the mouse pointer over each button. We describe the actions associated with each button below, ordered from left to right: + +* **Restart simulation model** +* **Build simulation model** – builds the simulation model so that it can be executed. +* **Start simulation** – starts the execution of the simulation (note that the model must be built before it can be executed – this is done by clicking on the 'Build simulation model' button to the immediate left). +* **Execute next scheduled action** – if the simulation is paused (see Pause button to the immediate right), by clicking on this button, the user can execute the next action scheduled in the simulation. This allows the user to perform a step-by-step execution of the simulation. To continue the simulation as normal, press the 'Start simulation' button again. +* **Pause simulation** – pauses the simulation model. Press the 'Start simulation' button to continue the simulation. +* **Update parameters in the live simulation** – if the user desires to change some of the [GUI parameters](https://www.microsimulation.ac.uk/jas-mine/resources/cookbook/gui-parameters/) (see 'Parameter Boxes' below) while the simulation is still running, first update the values of the GUI parameters and then click on this button. This is useful, for example, in seeing the impact of step changes in the parameters on the equilibrium state of a simulation model. Note that only parameters that are accessed by the model during the simulation after the update button has been clicked can have any impact on the simulation. For example, if a simulation uses a GUI parameter to determine the size of an agent population at the start of the simulation, and the population is subsequently evolved, the population size will not change despite the population size parameter having been updated if this parameter is only ever used by the model at the start of the simulation. In order to have a population size parameter that affects population size during the simulation, the model developer would need to explicitly code the simulation to check the size of the population at scheduled times during the simulation, and delete / create agents if the population size differs from the population size parameter. + +In addition, the toggle box **'Turn off database'** disables JAS-mine's [object-relational mapping](https://www.microsimulation.ac.uk/jas-mine/resources/focus/object-relational-mapping/) to the relational database management system. In this way, simulations with this toggle box ticked are running JAS-mine 'lite' – a lighter version without any of the database machinery. This may be useful if, for example, the user has no need of input or output databases in their simulation, and they want a way of reducing the memory requirements of their simulation and to potentially increase the speed of execution. Note that an exception will be thrown if a model requiring data from an input database is attempted to be built whilst the 'Turn off database' toggle box is ticked. + +The sliding scale on the right labelled **'Simulation speed'** adjusts the real-time speed in which the simulation is executed. The default speed is set to the maximum (and so is only limited by the processor speed of the computer on which the simulation is running), however the simulation can be slowed down by dragging the slider to the left – this may be useful for example when demonstrating a model to an audience when it is desired to slow down the updates of the graphs. + +## 1.3 Parameter Boxes + +A JAS-mine model's *[GUI parameters](https://www.microsimulation.ac.uk/jas-mine/resources/cookbook/gui-parameters/)* appear in the parameter boxes below the Simulation Control Pane. One parameter box for each of the '[Model-Collector-Observer](https://www.microsimulation.ac.uk/jas-mine/resources/focus/model-collector-observer/)' manager classes is displayed, as long as there are any variables in each of the manager classes that have the `@GUIparameter` annotation. + +![JAS-mine GUI Parameters](https://raw.githubusercontent.com/centreformicrosimulation/SimPaths/develop/docs/figures/SimPaths%20parameters.png) + +The description of a GUI parameter can be observed by hovering the mouse pointer over the value, upon which a yellow box containing the description appears if it has been defined as an attribute in the `@GUIparameter` annotation where the variable is declared, e.g.: +```java +@GUIparameter(description = "Country to be simulated") +private Country country = Country.IT; +``` + +The type of parameters determines the way they are presented in the boxes, with boxes to hold numerical values, tick boxes for Boolean 'toggle' variables, and drop down menus enumerating categories. In the figure above, the Country drop down menu appears after clicking on the value to the right of the Country label (Country is an Enum variable that can hold one of a finite set of values). The default values in the parameter boxes are the values hard-coded to the GUI parameters in the manager classes. If the user wants to change the default values of the GUI parameters, this must be done in the code. + +The GUI parameters can be adjusted from their default values before the model is built, or even during the execution of the simulation, although in this latter case the 'Update parameters in the live simulation' button in the Simulation Control Pane must be clicked for any parameters in the simulation to be updated. This is useful, for example, in seeing the impact of step changes in the parameters on the equilibrium state of a simulation model. Note that only parameters that are accessed by the model during the simulation after the update button has been clicked can have any impact on the simulation. For example, if a simulation uses a GUI parameter to determine the size of an agent population at the start of the simulation, and the population is subsequently evolved, the population size will not change despite the population size parameter having been updated if this parameter is only ever used by the model at the start of the simulation. In order to have a population size parameter that affects population size during the simulation, the model developer would need to explicitly code the simulation to check the size of the population at scheduled times during the simulation, and remove / add agents if the population size differs from the population size parameter. + +## 1.4 Graphical Widgets (Charts) + +Below the parameter boxes in the main pane with the blue background, a variety of graphics can be produced in the JAS-mine GUI, including time-series plots, histograms and geographical maps. For information on the currently supported graphics, see the JAS-mine GUI's Plot, Colormap and Space packages in the [API](https://www.microsimulation.ac.uk/jas-mine/resources/api/) documentation; for how to feed the graphical widgets, see the JAS-mine [statistical package](https://www.microsimulation.ac.uk/jas-mine/resources/tutorials/how-to-use-the-jasmine-statistical-package/). + +The graphics do not immediately appear in the GUI when the JAS-mine project's Start class is executed; the project must be built first by clicking on the 'Build simulation model' button in the Simulation Control Pane. + +The settings of a graphical widget can be adjusted by right clicking on it with the mouse pointer, and selecting the appropriate controls that are available for the type of widget. For example, the labels, line-type, colour and appearance of time series plots can be altered while running the simulation as shown below: + +![JAS-mine Chart Properties](https://raw.githubusercontent.com/centreformicrosimulation/SimPaths/develop/docs/figures/Chart%20Properties.png) + +In addition, for a time series plot, it is possible to zoom in to areas of data points by left-clicking and dragging the mouse pointer diagonally downwards and to right in order to select a rectangle of area to enlarge. The left hand side of the figure below shows the rectangle created by dragging the mouse pointer (the mouse pointer is not shown), and the right hand side is the resulting enlarged chart. The user can zoom out again either by dragging the mouse pointer upwards or leftwards, or by right clicking and selecting 'Auto Range -> Both Axes' from the list of options. + +![JAS-mine Chart Zoom](https://raw.githubusercontent.com/centreformicrosimulation/SimPaths/develop/docs/figures/SimPaths-Chart-Zoom.png) + +Finally, the time series plots can be saved as a PNG file, printed or copied by right clicking on the chart and selecting the relevant option. + +## 1.5 Output stream + +The output stream is the white coloured window at the bottom of the GUI. It contains the system and debugger out-stream data that would be printed out to the Command Prompt (in Windows), the Terminal (Linux), or in Eclipse if running in batch mode without the GUI. Such output includes any data produced by `System.out.println()` or `System.err.println()` commands in Java, and also information about the creation of database tables when building the project. The stack trace of any exceptions thrown will be printed out. The buttons on top of the output stream window include an option to save the text to file. + +![JAS-mine Output Stream](https://raw.githubusercontent.com/centreformicrosimulation/SimPaths/develop/docs/figures/Output%20stream.png) \ No newline at end of file diff --git a/documentation/wiki/user-guide/index.md b/documentation/wiki/user-guide/index.md new file mode 100644 index 000000000..2b0ea4b7d --- /dev/null +++ b/documentation/wiki/user-guide/index.md @@ -0,0 +1,3 @@ +# User Guide + +The guide below describes how SimPaths can be used to run simulations from the GUI (Graphical User Interface) and create scenarios by changing the GUI parameters, the input data, or the tax-benefit parameters. \ No newline at end of file diff --git a/documentation/wiki/user-guide/modifying-parameters.md b/documentation/wiki/user-guide/modifying-parameters.md new file mode 100644 index 000000000..c8af4ac8b --- /dev/null +++ b/documentation/wiki/user-guide/modifying-parameters.md @@ -0,0 +1,5 @@ +# Modifying SimPaths Parameters + +!!! warning "In progress" + This page is under development. Contributions welcome — + see the [User Guide](index.md) for other resources. diff --git a/documentation/wiki/user-guide/multiple-runs.md b/documentation/wiki/user-guide/multiple-runs.md new file mode 100644 index 000000000..4a4726d69 --- /dev/null +++ b/documentation/wiki/user-guide/multiple-runs.md @@ -0,0 +1,3 @@ +# Multiple Runs + +To run the model multiple times with different parameters, follow the instructions in the Developers guide. \ No newline at end of file diff --git a/documentation/wiki/user-guide/single-runs.md b/documentation/wiki/user-guide/single-runs.md new file mode 100644 index 000000000..46456b873 --- /dev/null +++ b/documentation/wiki/user-guide/single-runs.md @@ -0,0 +1,15 @@ +# Single Runs + +1. Install the [SimPaths executable](https://github.com/centreformicrosimulation/SimPaths/releases) +2. [Request access to the data](https://github.com/centreformicrosimulation/SimPaths/wiki/2.Input-data). +3. Run the app. In the pop-up menu, select option number 4 and click "Next." + + ![image](https://github.com/centreformicrosimulation/SimPaths/assets/56582427/c9042177-3fa1-472c-bfe5-b699449810d4) + +4. Allow a few minutes for the database to be built. +5. You should now see the GUI. + + ![image](https://github.com/centreformicrosimulation/SimPaths/assets/56582427/0ef65dad-5969-4b5e-9ada-bbb968dd380b) + +6. Click the "Build simulation model" button, and when the process finishes, click the "Start simulation" button. +7. You have now successfully run the simulation. Analyse real-time output and modify model parameters if desired. diff --git a/documentation/wiki/user-guide/tax-benefit-parameters.md b/documentation/wiki/user-guide/tax-benefit-parameters.md new file mode 100644 index 000000000..8ae5817c0 --- /dev/null +++ b/documentation/wiki/user-guide/tax-benefit-parameters.md @@ -0,0 +1,5 @@ +# Modifying Tax-Benefit Parameters + +!!! warning "In progress" + This page is under development. Contributions welcome — + see the [User Guide](index.md) for other resources. diff --git a/documentation/wiki/user-guide/uncertainty-analysis.md b/documentation/wiki/user-guide/uncertainty-analysis.md new file mode 100644 index 000000000..80a8c3d08 --- /dev/null +++ b/documentation/wiki/user-guide/uncertainty-analysis.md @@ -0,0 +1,18 @@ +# Uncertainty Analysis + +Uncertainty regarding a model's projections can arise from a variety of reasons (Bilcke et al., 2011; Creedy et al., 2007). In particular, sources of uncertainty are generally distinguished in **(i) input data**, for instance due to sampling errors in the initial population, **(ii) model structure**, that is the validity of the general modelling approach used (also called "methodological uncertainty"), **(iii) model specification**, which concerns the choice of the covariates and the functional forms used, and in particular the crucial assumption that any regularity observed in the data will not break up in the future, **(iv) model parameters**, pointing to the imprecision of the estimates and/or externally provided parameters, and finally **(v) Montecarlo variation** of the model output, which originates from the fact that the simulated aggregate quantities are also imprecise estimates of the theoretical aggregate quantities that the model implicitly defines. None of the above sources of uncertainty is generally considered in microsimulation studies, although this is recognised and criticised (see for instance Goedemé et al, 2013). However, "the calculation of confidence intervals around model results that account for all sources of error remains a major challenge" (Mitton et al., 2000). + +Generally speaking, source (i) should be limited, due to the use of appropriate input data and sampling weights. Sources (ii)-(iii) are often left unexplored, by making the common assumption that the model is well specified (measures of fit should be reported for each estimated equation to corroborate this hypothesis). Montecarlo variation of the model outcome (source v) can be brought down to negligible by appropriately scaling up simulated population size. The remaining source of uncertainty that needs to be addressed is therefore **parameters uncertainty**, stemming from sampling errors in estimation (source iv). + +There are two approaches that can be used to deal with this uncertainty (Creedy et al., 2007). The first is what we might label "brute force", and prescribes to bootstrap the coefficients of the estimated equations from their estimated distribution (e.g. multivariate normal in case of multinomial probit regressions) with mean equal to the point estimate, and variance-covariance matrix equal to the estimated variance-covariance. Bootstrapping needs to be performed only once, at the beginning of the simulation: the entire simulation is then performed with the bootstrapped values of the coefficients. The second approach provides an approximation by assuming from the onset a normal distribution for the resulting confidence intervals, requiring many fewer draws from the parameter distribution. + +JAS-mine allows for a simple implementation of the "brute-force" approach, by exploiting the bootstrapping feature of its Regression library within a multi-run implementation (see the FOCUS section): the simulation is run many times (e.g. 1,000 times), each using a different set of coefficients. The result is a distribution of model outcomes, around the central projections obtained with the estimated coefficients. + +![Uncertainty Analysis Graph](https://www.microsimulation.ac.uk/wp-content/uploads/documentation/active_F_20_64_IE.png) + +**References:** + +* Bilcke J, Beutels P, Brisson M, Jit M (2011). Accounting for Methodological, Structural, and Parameter Uncertainty in Decision-Analytic Models: A Practical Guide. Medical Decision Making 31(4): 675-692. +* Creedy J, Kalb G, Kew H (2007). Confidence intervals for policy reforms in behavioural tax microsimulation modelling. Bulletin of Economic Research 59(1): 37-65. +* Goedemé T, Van den Bosch K, Salanauskaite L, Verbist G (2013) Testing the Statistical Significance of Microsimulation Results: A Plea. International Journal of Microsimulation 6(3): 50-77. +* Mitton L, Sutherland H, Weeks M (2000). Microsimulation Modelling for Policy Analysis. Challenges and Innovations. Cambridge University Press, Cambridge, UK. diff --git a/documentation/wiki/validation/index.md b/documentation/wiki/validation/index.md new file mode 100644 index 000000000..1ba2c9937 --- /dev/null +++ b/documentation/wiki/validation/index.md @@ -0,0 +1,126 @@ +# Model Validation + +## 1. Introduction + +This section explains the current procedures implemented to validate the SimPaths' inputs and outputs. + +Validation is a key step in assessing the accuracy and consistency of the model. By comparing both the model inputs and the simulated outputs to external survey data, we can identify potential issues with the model specification, implementation, or underlying data. Validation should be performed after any major update to the model inputs or adjustments to the *SimPaths* code. + +At present, validation is organised into three main steps: + +1. **Validating regression estimates** + This step assesses the performance of the regression models that govern key behavioural and demographic processes within SimPaths (e.g. leaving the parental home, returning to education). Using the estimated regression coefficients, we generate predicted values for each observation in the estimation sample, compute/plot aggregate statistics, and compare these with the equivalent values from the estimation sample. This provides a straightforward check that the estimated relationships embedded in the model are consistent with observed empirical patterns. + +2. **Validating simulated output** + The second step examines the simulated output produced by SimPaths. The model is run for a period in which comparable survey data are available (2011–2023), and aggregate measures from the simulated data are compared to benchmarks computed using data from the UK Household Longitudinal Study (UKHLS). The validation focuses on the model's ablility to reproduce **aggregate measures over time** (time-series consistency) and **distributions within years**, rather than the accuracy of individual trajectories through time. + +3. **Validating regression estimates from simulated output** + *To be completed.* + + +## 2. Obtaining the validation scripts + +Validation procedures are currently executed in **Stata**. The corresponding do-files are located in the *validation* subfolder on the `develop` branch of the *SimPaths* GitHub repository. + +You can access these files in one of three ways: + +1. **Clone the repository** – recommended for developers who want the full version history or plan to contribute changes. See Section Working in Github - Introduction. + +2. **Download the repository as a ZIP file** – provides a snapshot of all files on the selected branch. + - In the GitHub interface, select the `develop` branch, click the green **Code** button, and choose **Download ZIP**. + - Extract the ZIP file locally and navigate to the *validation* folder. + +3. **Download individual files directly from GitHub** – suitable if you only need a few specific scripts. + - Navigate to the desired file in the repository (e.g. *validation/01_estimate_validation/00_master.do*). + - Click the **Download raw file** icon (the downward arrow) on the top right of the file viewer to save it locally. + +Each method gives you the same file contents; the difference is whether you download just one file, a snapshot of the branch, or the entire version-controlled repository. + + +## 3. Running the validation scripts + +Once you have obtained the relevant validation files, the next step is to run them in **Stata**. +This section explains how to set up your working environment, what data are required, and how to execute the validation do-files for each stage of validation. + +### 3.1 Validating regression estimates + +These do-files are contained in the subfolder *01_estimate_validation*. +Before running these scripts, four preparatory steps are required: + +- **a. Run the regression estimation do-files** + The validation do-files requires datasets produced during the regression estimation stage. + Ensure that the estimation do-files have been run and that the output data are available before proceeding. + +- **b. Set up the file structure** + Ensure the downloaded do-files in an **estimate validation** folder in a subfolder called *do_files*. + Create additional subfolders *data* and *graphs*. + Within the *graphs* subfolder, create the following subfolders: + - `education` + - `fertility` + - `health` + - `home_ownership` + - `income` + - `leave_parental_home` + - `partnership` + - `retirement` + - `wages` + +- **c. Check the location of the input data files** + Place the necessary data files in the *data* subfolder. + These will contain “sample” in their title (e.g. *E1_sample*) and are produced in the regression estimation do-files. + +- **d. Update directory paths in *00_master.do*** + Before running the validation do-files, set up the file directories. + Open *00_master.do* and update the global file paths as necessary. + If the file structure is set up as above, only the global *dir_work* needs to be changed to correspond to the main folder for estimate validation. + +Once these steps have been completed, you can straightforwardly run the do-files to produce the validation plots. + + +### 3.2 Validating the simulated output + +The do-files for validating the simulated output are contained in the subfolder *02_simulation_validation*. +These should be run **after executing *SimPaths***, as they rely on a number of *.csv* files produced by the model. + +Before running these scripts, complete the following preparatory steps: + +- **a. Obtain simulated output from *SimPaths*** + Ensure that the most recent simulated output is available. See [Section 2](https://github.com/centreformicrosimulation/SimPaths/wiki/2.-Running-SimPaths) to run SimPaths and obtain the simulation outputs. + +- **b. Set up the file structure** + Place the downloaded do-files in a **simulation validation** folder in a subfolder called *do_files*. + Also create additional subfolders: *data* and *graphs*. + Within the *graphs* subfolder, create the following subfolders: + - `care` + - `children` + - `disability` + - `economic_activity` + - `education` + - `health` + - `hours_worked` + - `income/capital_income` + - `income/disposable_income` + - `income/equivalized_disposable_income` + - `income/gross_income` + - `income/gross_labour_income` + - `income/pension_income` + - `inequality` + - `partnership` + - `poverty` + - `wages` + + Each of these subfolders will contain the relevant validation plots produced by the corresponding do-files. + +- **c. Deposit input data** + Place the simulated output *.csv* files titled `Person`, `BenefitUnit` and `Household` in the *data* subfolder. + You will also need the Understanding Society survey data to compare against the simulated output. + For this purpoe, we currently use the following initial population files: + - *ukhls_pooled_all_obs_01* + - *ukhls_pooled_all_obs_09* + +- **d. Update directory paths in *00_master.do*** + Open *00_master.do* in the *do_files* subfolder and update the global paths. + In the “Define directories” section, update the global *dir_path* to point to the location of the main simulation validation folder. + Run the file up to (but not including) the “Run do files” section to set directories and parameters (adjust as necessary). + +Once these steps have been completed, you can straightforwardly run the do-files to produce the validation plots. \ No newline at end of file diff --git a/pom.xml b/pom.xml index d85b0752c..42cd38e3d 100644 --- a/pom.xml +++ b/pom.xml @@ -135,6 +135,18 @@ com.github.jasmineRepo JAS-mine-gui 4.2.2 + + + xml-apis + xmlParserAPIs + + + + + + xml-apis + xml-apis + 1.4.01