diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b0d1ae --- /dev/null +++ b/.gitignore @@ -0,0 +1,132 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +_build +.DS_Store \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e2c2663 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,13 @@ +{ + "python.pythonPath": "env/bin/python3", + "python.formatting.provider": "black", + "python.languageServer": "Pylance", + "files.exclude": { + "**/.git": true, + "**/.svn": true, + "**/.hg": true, + "**/CVS": true, + "**/.DS_Store": true, + "**/__pycache__": true + }, +} \ No newline at end of file diff --git a/setup.py b/setup.py index 576acce..dbfe972 100644 --- a/setup.py +++ b/setup.py @@ -3,31 +3,24 @@ setup( name="symbulate", version="0.5.5", - description="A symbolic algebra for specifying simulations.", - url="https://github.com/dlsun/symbulate", - author="Dennis Sun", author_email="dsun09@calpoly.edu", - license="GPLv3", - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Education', - 'Topic :: Scientific/Engineering :: Mathematics', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Programming Language :: Python :: 3', + "Development Status :: 3 - Alpha", + "Intended Audience :: Education", + "Topic :: Scientific/Engineering :: Mathematics", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Programming Language :: Python :: 3", ], - - keywords='probability simulation', - + keywords="probability simulation", packages=find_packages(), - install_requires=[ - 'numpy', - 'scipy', - 'matplotlib' - ] + "numpy", + "scipy", + "matplotlib", + "rich", + ], ) diff --git a/symbulate/__init__.py b/symbulate/__init__.py index 4df53de..4ab746e 100644 --- a/symbulate/__init__.py +++ b/symbulate/__init__.py @@ -1,5 +1,5 @@ -from .probability_space import ProbabilitySpace, BoxModel, DeckOfCards -from .random_variables import RV +from .probability_space import ProbabilitySpace, BoxModel, DeckOfCards, Dice, Coin +from .random_variables import RV, CoinRV from .random_processes import RandomProcess from .distributions import ( Bernoulli, @@ -24,38 +24,30 @@ Rayleigh, MultivariateNormal, BivariateNormal, - Multinomial + Multinomial, ) from .independence import AssumeIndependent -from .index_sets import ( - Naturals, - Integers, - Reals, - DiscreteTimeSequence -) +from .index_sets import Naturals, Integers, Reals, DiscreteTimeSequence from .result import ( Scalar, Vector, InfiniteVector, DiscreteTimeFunction, ContinuousTimeFunction, - concat + concat, ) from .gaussian_process import ( GaussianProcess, GaussianProcessProbabilitySpace, BrownianMotion, - BrownianMotionProbabilitySpace -) -from .poisson_process import ( - PoissonProcess, - PoissonProcessProbabilitySpace + BrownianMotionProbabilitySpace, ) +from .poisson_process import PoissonProcess, PoissonProcessProbabilitySpace from .markov_chains import ( MarkovChain, MarkovChainProbabilitySpace, ContinuousTimeMarkovChain, - ContinuousTimeMarkovChainProbabilitySpace + ContinuousTimeMarkovChainProbabilitySpace, ) from .plot import figure, xlabel, ylabel, xlim, ylim, plot from .math import * diff --git a/symbulate/base.py b/symbulate/base.py index eeb8d95..88bdc5d 100644 --- a/symbulate/base.py +++ b/symbulate/base.py @@ -3,6 +3,7 @@ import numpy as np import scipy.stats as stats + class Arithmetic: """A class with operations such as +, -, *, /. @@ -107,7 +108,7 @@ def __ge__(self, other): class Statistical: """A class with statistical functions, such as mean, var, etc. - Subclasses must implement the _statistic_factory and + Subclasses must implement the _statistic_factory and _multivariate_statistic_factory methods, which specify how (univariate) statistics (e.g., mean and variance), as well as multivariate statistics (e.g., covariance and correlation) @@ -172,7 +173,7 @@ def iqr(self): Returns: The interquartile range. """ - return self.quantile(.75) - self.quantile(.25) + return self.quantile(0.75) - self.quantile(0.25) def median(self): r"""Calculate the median. @@ -295,7 +296,7 @@ def cov(self): The covariance is a measure of the relationship between two variables. The sign of the covariance indicates the direction of the relationship. - .. math:: + .. math:: \sigma_{XY} = \frac{1}{n} \sum_{i=1}^n (x_i - \mu_X) (y_i - \mu_Y) @@ -314,7 +315,7 @@ def corr(self): The correlation is the covariance normalized by the standard deviations. - .. math:: + .. math:: \rho_{XY} = \frac{1}{n} \sum_{i=1}^n \frac{x_i - \mu_X}{\sigma_X} \frac{y_i - \mu_Y}{\sigma_Y} @@ -331,12 +332,12 @@ def corr(self): def corrcoef(self): r"""An alias for .corr()""" return self.corr() - + class Logical: """A class that supports logical operations: and, or, and not. - Subclasses must implement the _logical_factory method, which + Subclasses must implement the _logical_factory method, which specifies how the logical operator operates on two objects of that type. """ @@ -353,12 +354,12 @@ def __invert__(self): op_func = self._logical_factory(lambda x: not x) return op_func(self) - + class Filterable: """A class with filtering and counting methods. - Subclasses must implement the filter method, which specifies how to - construct a new instance containing only those elements that satisfy + Subclasses must implement the filter method, which specifies how to + construct a new instance containing only those elements that satisfy a given criterion. """ @@ -367,7 +368,7 @@ def filter_eq(self, value): Args: value: A value of the same type as the elements in the object. - + Returns: All of the elements that were equal to value. """ @@ -378,7 +379,7 @@ def filter_neq(self, value): Args: value: A value of the same type as the elements in the object. - + Returns: All of the elements that were _not_ equal to value. """ @@ -387,12 +388,12 @@ def filter_neq(self, value): def filter_lt(self, value): """Get all elements less than a particular value. - N.B. lt stands for "less than". For elements that are + N.B. lt stands for "less than". For elements that are less than _or equal to_ the given value, use .filter_leq(value). Args: value: A value of the same type as the elements in the object. - + Returns: All of the elements that were less than value. """ @@ -401,12 +402,12 @@ def filter_lt(self, value): def filter_leq(self, value): """Get all elements less than or equal to a particular value. - N.B. leq stands for "less than or equal to". For elements + N.B. leq stands for "less than or equal to". For elements that are strictly less than the given value, use .filter_lt(value). Args: value: A value of the same type as the elements in the object. - + Returns: All of the elements that were less than _or equal to_ value. """ @@ -415,12 +416,12 @@ def filter_leq(self, value): def filter_gt(self, value): """Get all elements greater than a particular value. - N.B. gt stands for "greater than". For elements that are + N.B. gt stands for "greater than". For elements that are greater than _or equal to_ the given value, use .filter_geq(value). Args: value: A value of the same type as the elements in the object. - + Returns: All of the elements that were greater than value. """ @@ -435,16 +436,15 @@ def filter_geq(self, value): Args: value: A value of the same type as the elements in the object. - + Returns: All of the elements that were greater than _or equal to_ value. """ return self.filter(lambda x: x >= value) - # The following functions return an integer indicating # how many elements passed a given criterion. - + def count(self, func=lambda x: True): """Counts the number of elements satisfying a given criterion. @@ -463,7 +463,7 @@ def count_eq(self, value): Args: value: A value of the same type as the elements in the object. - + Returns: int: The number of elements that were equal to value. """ @@ -474,7 +474,7 @@ def count_neq(self, value): Args: value: A value of the same type as the elements in the object. - + Returns: int: The number of elements that were not equal to value. """ @@ -484,12 +484,12 @@ def count_lt(self, value): """Count the number of elements less than a particular value. N.B. lt stands for "greater than". For the number of elements - that are less than _or equal to_ the given value, use + that are less than _or equal to_ the given value, use .count_leq(value). Args: value: A value of the same type as the elements in the object. - + Returns: int: The number of elements that were less than value. """ @@ -499,12 +499,12 @@ def count_leq(self, value): """Count the number of elements less than or equal to a particular value. N.B. leq stands for "less than or equal to". For the number of - elements that are strictly greater than the given value, use + elements that are strictly greater than the given value, use .count_lt(value). Args: value: A value of the same type as the elements in the object. - + Returns: int: The number of elements that were less than _or equal to_ value. """ @@ -514,12 +514,12 @@ def count_gt(self, value): """Count the number of elements greater than a particular value. N.B. gt stands for "greater than". For the number of elements - that are greater than _or equal to_ the given value, use + that are greater than _or equal to_ the given value, use .count_geq(value). Args: value: A value of the same type as the elements in the object. - + Returns: int: The number of elements that were greater than value. """ @@ -529,12 +529,12 @@ def count_geq(self, value): """Count the number of elements greater than or equal to a particular value. N.B. geq stands for "greater than or equal to". For the number of - elements that are strictly greater than the given value, use + elements that are strictly greater than the given value, use .count_gt(value). Args: value: A value of the same type as the elements in the object. - + Returns: int: The number of elements that were greater than _or equal to_ value. """ @@ -544,7 +544,7 @@ def count_geq(self, value): class Transformable: """A class that supports transformations. - Subclasses must implement the apply method, which specifies how to + Subclasses must implement the apply method, which specifies how to apply a function to the object. """ @@ -553,11 +553,9 @@ def __abs__(self): def __round__(self): return self.apply(round) - + def __floor__(self): return self.apply(math.floor) def __ceil__(self): return self.apply(math.ceil) - - diff --git a/symbulate/distributions.py b/symbulate/distributions.py index b5670d7..1c3add2 100644 --- a/symbulate/distributions.py +++ b/symbulate/distributions.py @@ -7,6 +7,7 @@ from .plot import get_next_color from .result import Scalar, Vector, InfiniteVector + class Distribution(ProbabilitySpace): def __init__(self, params, scipy, discrete=True): self.params = params @@ -15,7 +16,7 @@ def __init__(self, params, scipy, discrete=True): if discrete: self.pmf = lambda x: scipy.pmf(x, **self.params) - self.pdf = self.pmf # add pdf as an alias for pmf + self.pdf = self.pmf # add pdf as an alias for pmf else: self.pdf = lambda x: scipy.pdf(x, **self.params) @@ -28,10 +29,7 @@ def __init__(self, params, scipy, discrete=True): self.sd = lambda: scipy.std(**self.params) self.sim_func = scipy.rvs - self.xlim = ( - scipy.ppf(0.001, **self.params), - scipy.ppf(0.999, **self.params) - ) + self.xlim = (scipy.ppf(0.001, **self.params), scipy.ppf(0.999, **self.params)) def draw(self): return Scalar(self.sim_func(**self.params)) @@ -40,13 +38,18 @@ def draw(self): # of vectorized simulations. def __pow__(self, exponent): if exponent == float("inf"): + def draw(): def _func(_): return self.sim_func(**self.params) + return InfiniteVector(_func) + else: + def draw(): return Vector(self.sim_func(**self.params, size=exponent)) + return ProbabilitySpace(draw) def plot(self, xlim=None, alpha=None, ax=None, **kwargs): @@ -65,7 +68,7 @@ def plot(self, xlim=None, alpha=None, ax=None, **kwargs): # determine limits for y-axes based on y values ymin, ymax = ys[np.isfinite(ys)].min(), ys[np.isfinite(ys)].max() ylim = min(0, ymin - 0.05 * (ymax - ymin)), 1.05 * ymax - + # get the current axis if they exist and no axis is specified fig = plt.gcf() if ax is None and fig.axes: @@ -78,12 +81,12 @@ def plot(self, xlim=None, alpha=None, ax=None, **kwargs): ylower, yupper = ax.get_ylim() ylim = min(ylim[0], ylower), max(ylim[1], yupper) else: - ax = plt.gca() # creates new axis + ax = plt.gca() # creates new axis # set the axis limits ax.set_xlim(*xlim) ax.set_ylim(*ylim) - + # get next color in cycle color = get_next_color(ax) @@ -100,6 +103,7 @@ def plot(self, xlim=None, alpha=None, ax=None, **kwargs): ## Discrete Distributions + class Bernoulli(Distribution): """Defines a probability space for a Bernoulli distribution. @@ -115,11 +119,12 @@ def __init__(self, p): else: raise Exception("p must be between 0 and 1") - params = { - "p" : p - } + params = {"p": p} super().__init__(params, stats.bernoulli, True) - self.xlim = (0, 1) # Bernoulli distributions are not defined for x < 0 and x > 1 + self.xlim = ( + 0, + 1, + ) # Bernoulli distributions are not defined for x < 0 and x > 1 class Binomial(Distribution): @@ -136,9 +141,9 @@ def __init__(self, n, p): if n >= 0 and isinstance(n, numbers.Integral): self.n = n - #elif n == 0: - #raise NotImplementedError - #TODO + # elif n == 0: + # raise NotImplementedError + # TODO else: raise Exception("n must be a non-negative integer") @@ -147,12 +152,9 @@ def __init__(self, n, p): else: raise Exception("p must be between 0 and 1") - params = { - "n" : n, - "p" : p - } + params = {"n": n, "p": p} super().__init__(params, stats.binom, True) - self.xlim = (0, n) # Binomial distributions are not defined for x < 0 and x > n + self.xlim = (0, n) # Binomial distributions are not defined for x < 0 and x > n class Hypergeometric(Distribution): @@ -185,17 +187,16 @@ def __init__(self, n, N0, N1): else: raise Exception("N1 must be a non-negative integer") - params = { - "M" : N0 + N1, - "n" : N1, - "N" : n - } + params = {"M": N0 + N1, "n": N1, "N": n} if N0 + N1 < n: raise Exception("N0 + N1 cannot be less than the sample size n") super().__init__(params, stats.hypergeom, True) - self.xlim = (0, n) # Hypergeometric distributions are not defined for x < 0 and x > n + self.xlim = ( + 0, + n, + ) # Hypergeometric distributions are not defined for x < 0 and x > n class Geometric(Distribution): @@ -216,11 +217,12 @@ def __init__(self, p): else: raise Exception("p must be between 0 and 1") - params = { - "p" : p - } + params = {"p": p} super().__init__(params, stats.geom, True) - self.xlim = (1, self.xlim[1]) # Geometric distributions are not defined for x < 1 + self.xlim = ( + 1, + self.xlim[1], + ) # Geometric distributions are not defined for x < 1 class NegativeBinomial(Distribution): @@ -247,17 +249,16 @@ def __init__(self, r, p): else: raise Exception("p must be between 0 and 1") - params = { - "n" : r, - "p" : p, - "loc" : r - } + params = {"n": r, "p": p, "loc": r} super().__init__(params, stats.nbinom, True) - self.xlim = (r, self.xlim[1]) # Negative Binomial distributions are not defined for x < r + self.xlim = ( + r, + self.xlim[1], + ) # Negative Binomial distributions are not defined for x < r def draw(self): """A function that takes no arguments and - returns a single draw from the Negative Binomial distribution.""" + returns a single draw from the Negative Binomial distribution.""" # Numpy's negative binomial returns numbers in [0, inf), # but we want numbers in [r, inf). @@ -288,12 +289,9 @@ def __init__(self, r, p): else: raise Exception("p must be between 0 and 1") - params = { - "n" : r, - "p" : p - } + params = {"n": r, "p": p} super().__init__(params, stats.nbinom, True) - self.xlim = (0, self.xlim[1]) # Pascal distributions are not defined for x < 0 + self.xlim = (0, self.xlim[1]) # Pascal distributions are not defined for x < 0 class Poisson(Distribution): @@ -310,11 +308,9 @@ def __init__(self, lam): else: raise Exception("Lambda (lam) must be greater than 0") - params = { - "mu" : lam - } + params = {"mu": lam} super().__init__(params, stats.poisson, True) - self.xlim = (0, self.xlim[1]) # Poisson distributions are not defined for x < 0 + self.xlim = (0, self.xlim[1]) # Poisson distributions are not defined for x < 0 class DiscreteUniform(Distribution): @@ -329,20 +325,18 @@ def __init__(self, a=0, b=1): self.a = a self.b = b + 1 - params = { - "low" : self.a, - "high" : self.b - } + params = {"low": self.a, "high": self.b} if a >= b: raise Exception("b cannot be less than or equal to a") super().__init__(params, stats.randint, True) - self.xlim = (a, b) # Uniform distributions are not defined for x < a and x > b + self.xlim = (a, b) # Uniform distributions are not defined for x < a and x > b ## Continuous Distributions + class Uniform(Distribution): """Defines a probability space for a uniform distribution. @@ -355,16 +349,13 @@ def __init__(self, a=0.0, b=1.0): self.a = a self.b = b - params = { - "loc" : a, - "scale" : b - a - } + params = {"loc": a, "scale": b - a} if a > b: raise Exception("b cannot be less than a") super().__init__(params, stats.uniform, False) - self.xlim = (a, b) # Uniform distributions are not defined for x < a and x > b + self.xlim = (a, b) # Uniform distributions are not defined for x < a and x > b class Normal(Distribution): @@ -377,18 +368,19 @@ class Normal(Distribution): var (float): variance parameter of the normal distribution (if specified, var parameter will be ignored) """ - #TODO edit docstring for Normal Distribution + + # TODO edit docstring for Normal Distribution def __init__(self, mean=0.0, sd=1.0, var=None): - #Note: cleaner way to implement this + # Note: cleaner way to implement this if var is None: if sd > 0: self.scale = sd elif sd == 0: raise NotImplementedError - #TODO + # TODO else: raise Exception("sd cannot be less than 0") @@ -397,14 +389,11 @@ def __init__(self, mean=0.0, sd=1.0, var=None): self.scale = np.sqrt(var) elif var == 0: raise NotImplementedError - #TODO + # TODO else: raise Exception("var cannot be less than 0") - params = { - "loc" : mean, - "scale" : self.scale - } + params = {"loc": mean, "scale": self.scale} super().__init__(params, stats.norm, False) @@ -434,11 +423,12 @@ def __init__(self, rate=1.0, scale=None): else: raise Exception("scale must be positive") - params = { - "scale" : 1. / rate if scale is None else scale - } + params = {"scale": 1.0 / rate if scale is None else scale} super().__init__(params, stats.expon, False) - self.xlim = (0, self.xlim[1]) # Exponential distributions are not defined for x < 0 + self.xlim = ( + 0, + self.xlim[1], + ) # Exponential distributions are not defined for x < 0 class Gamma(Distribution): @@ -474,12 +464,9 @@ def __init__(self, shape, rate=1.0, scale=None): else: raise Exception("scale must be positive") - params = { - "a" : shape, - "scale" : 1. / rate if scale is None else scale - } + params = {"a": shape, "scale": 1.0 / rate if scale is None else scale} super().__init__(params, stats.gamma, False) - self.xlim = (0, self.xlim[1]) # Gamma distributions are not defined for x < 0 + self.xlim = (0, self.xlim[1]) # Gamma distributions are not defined for x < 0 class Beta(Distribution): @@ -502,12 +489,9 @@ def __init__(self, a, b): else: raise Exception("b must be positive") - params = { - "a" : a, - "b" : b - } + params = {"a": a, "b": b} super().__init__(params, stats.beta, False) - self.xlim = (0, 1) # Beta distributions are not defined for x < 0 and x > 1 + self.xlim = (0, 1) # Beta distributions are not defined for x < 0 and x > 1 class StudentT(Distribution): @@ -523,14 +507,12 @@ def __init__(self, df): else: raise Exception("df must be greater than 0") - params = { - "df" : df - } + params = {"df": df} super().__init__(params, stats.t, False) if df == 1: - self.mean = lambda: float('nan') - self.sd = lambda: float('nan') - self.var = lambda: float('nan') + self.mean = lambda: float("nan") + self.sd = lambda: float("nan") + self.var = lambda: float("nan") class ChiSquare(Distribution): @@ -546,11 +528,12 @@ def __init__(self, df): else: raise Exception("df must be a positive integer") - params = { - "df" : df - } + params = {"df": df} super().__init__(params, stats.chi2, False) - self.xlim = (0, self.xlim[1]) # Chi-Square distributions are not defined for x < 0 + self.xlim = ( + 0, + self.xlim[1], + ) # Chi-Square distributions are not defined for x < 0 class F(Distribution): @@ -573,12 +556,9 @@ def __init__(self, dfN, dfD): else: raise Exception("dfD must be greater than 0") - params = { - "dfn" : dfN, - "dfd" : dfD - } + params = {"dfn": dfN, "dfd": dfD} super().__init__(params, stats.f, False) - self.xlim = (0, self.xlim[1]) # F distributions are not defined for x < 0 + self.xlim = (0, self.xlim[1]) # F distributions are not defined for x < 0 class Cauchy(Distribution): @@ -592,10 +572,7 @@ def __init__(self, loc=0, scale=1): self.loc = loc self.scale = scale - params = { - "loc" : loc, - "scale" : scale - } + params = {"loc": loc, "scale": scale} super().__init__(params, stats.cauchy, False) @@ -624,12 +601,12 @@ def __init__(self, mu=0.0, sigma=1.0): else: raise Exception("sigma must be greater than 0") - params = { - "s" : self.s, - "scale" : np.exp(mu) - } + params = {"s": self.s, "scale": np.exp(mu)} super().__init__(params, stats.lognorm, False) - self.xlim = (0, self.xlim[1]) # Log-Normal distributions are not defined for x < 0 + self.xlim = ( + 0, + self.xlim[1], + ) # Log-Normal distributions are not defined for x < 0 class Pareto(Distribution): @@ -653,17 +630,17 @@ def __init__(self, b=1.0, scale=1.0): else: raise Exception("scale must be greater than 0") - params = { - "b" : self.b, - "scale" : self.scale - } + params = {"b": self.b, "scale": self.scale} super().__init__(params, stats.pareto, False) - self.xlim = (scale, self.xlim[1]) # Pareto distributions are not defined for x < scale + self.xlim = ( + scale, + self.xlim[1], + ) # Pareto distributions are not defined for x < scale def draw(self): """A function that takes no arguments and - returns a single draw from the Pareto distribution.""" - + returns a single draw from the Pareto distribution.""" + # Numpy's Pareto is Lomax distribution, or Type II Pareto # but we want the more standard parametrization return self.scale * (1 + np.random.pareto(self.b)) @@ -688,6 +665,7 @@ def __init__(self): ## Multivariate Distributions + class MultivariateNormal(Distribution): """Defines a probability space for a multivariate normal distribution. @@ -699,9 +677,11 @@ class MultivariateNormal(Distribution): def __init__(self, mean, cov): if len(mean) != len(cov): - raise Exception("The dimension of the mean vector" + - " is not compatible with the dimensions" + - " of the covariance matrix.") + raise Exception( + "The dimension of the mean vector" + + " is not compatible with the dimensions" + + " of the covariance matrix." + ) if len(mean) >= 1: self.mean = mean @@ -710,10 +690,14 @@ def __init__(self, mean, cov): if len(cov) >= 1: if all(len(row) == len(mean) for row in cov): - if np.all(np.linalg.eigvals(cov) >= 0) and np.allclose(cov, np.transpose(cov)): + if np.all(np.linalg.eigvals(cov) >= 0) and np.allclose( + cov, np.transpose(cov) + ): self.cov = cov else: - raise Exception("Cov matrix is not symmetric and positive semi-definite") + raise Exception( + "Cov matrix is not symmetric and positive semi-definite" + ) else: raise Exception("Cov matrix is not square") else: @@ -730,21 +714,25 @@ def plot(self): def draw(self): """A function that takes no arguments and - returns a single draw from the Multivariate Normal distribution.""" + returns a single draw from the Multivariate Normal distribution.""" return Vector(np.random.multivariate_normal(self.mean, self.cov)) def __pow__(self, exponent): if exponent == float("inf"): + def draw(): def _func(n): return self.draw() + return InfiniteVector(_func) + else: + def draw(): return Vector(self.draw() for _ in range(exponent)) - return ProbabilitySpace(draw) + return ProbabilitySpace(draw) class BivariateNormal(MultivariateNormal): @@ -765,14 +753,20 @@ class BivariateNormal(MultivariateNormal): (if specified, corr parameter will be ignored) """ - def __init__(self, - mean1=0.0, mean2=0.0, - sd1=1.0, sd2=1.0, corr=0.0, - var1=None, var2=None, cov=None): + def __init__( + self, + mean1=0.0, + mean2=0.0, + sd1=1.0, + sd2=1.0, + corr=0.0, + var1=None, + var2=None, + cov=None, + ): if not -1 <= corr <= 1: - raise Exception("Correlation must be " - "between -1 and 1.") + raise Exception("Correlation must be " "between -1 and 1.") self.mean = [mean1, mean2] @@ -794,7 +788,6 @@ def __init__(self, self.pdf = lambda x: stats.multivariate_normal(x, self.mean, self.cov) - class Multinomial(Distribution): """Defines a probability space for a multinomial distribution. @@ -807,40 +800,42 @@ class Multinomial(Distribution): def __init__(self, n, p): if n >= 0 and isinstance(n, numbers.Integral): self.n = n - #elif n == 0: - #raise NotImplementedError - #TODO + # elif n == 0: + # raise NotImplementedError + # TODO else: raise Exception("n must be a non-negative integer") if sum(p) == 1 and min(p) >= 0: self.p = p else: - raise Exception("Elements of p must be non-negative" + - " and sum to 1.") + raise Exception("Elements of p must be non-negative" + " and sum to 1.") self.discrete = False self.pdf = lambda x: stats.multinomial(x, n, p) def plot(self): raise Exception( - "Plotting is not currently available for " - "the Multinomial distribution." + "Plotting is not currently available for " "the Multinomial distribution." ) def draw(self): """A function that takes no arguments and - returns a single draw from the multinomial distribution.""" + returns a single draw from the multinomial distribution.""" return Vector(np.random.multinomial(self.n, self.p)) def __pow__(self, exponent): if exponent == float("inf"): + def draw(): def _func(_): return self.draw() + return InfiniteVector(_func) + else: + def draw(): return Vector(self.draw() for _ in range(exponent)) diff --git a/symbulate/gaussian_process.py b/symbulate/gaussian_process.py index fafb192..f3d32d5 100644 --- a/symbulate/gaussian_process.py +++ b/symbulate/gaussian_process.py @@ -1,16 +1,13 @@ import numpy as np -from .index_sets import ( - DiscreteTimeSequence, - Reals -) +from .index_sets import DiscreteTimeSequence, Reals from .probability_space import ProbabilitySpace from .result import ( DiscreteTimeFunction, ContinuousTimeFunction, Vector, is_number, - is_numeric_vector + is_numeric_vector, ) from .random_variables import RV from .random_processes import RandomProcess @@ -27,12 +24,10 @@ def get_gaussian_process_result(mean_func, cov_func, index_set=Reals()): base_class = ContinuousTimeFunction else: raise Exception( - "Index set for Gaussian process must be Reals or " - "DiscreteTimeSequence." + "Index set for Gaussian process must be Reals or " "DiscreteTimeSequence." ) class GaussianProcessResult(base_class): - def __init__(self, mean_func, cov_func): self.mean = np.empty(shape=0) @@ -89,14 +84,11 @@ def _vfunc(ts): for j, t in enumerate(ts): cov22[i, j] = cov_func(s, t) - cond_mean = (mean2 + ( - cov12.T @ - np.linalg.solve(cov11, list(self.observed.values()) - self.mean) - )) - cond_var = (cov22 - ( - cov12.T @ - np.linalg.solve(cov11, cov12) - )) + cond_mean = mean2 + ( + cov12.T + @ np.linalg.solve(cov11, list(self.observed.values()) - self.mean) + ) + cond_var = cov22 - (cov12.T @ np.linalg.solve(cov11, cov12)) # update mean vector and covariance matrix self.mean = np.append(self.mean, mean2) @@ -109,14 +101,14 @@ def _vfunc(ts): for t, v in zip(ts, new_values): self.observed[t] = v values[np.isnan(values)] = new_values - + return values self.vfunc = _vfunc def _func(t): return _vfunc([t])[0] - + super().__init__(func=_func) self.index_set = index_set @@ -124,7 +116,6 @@ def _func(t): class GaussianProcessProbabilitySpace(ProbabilitySpace): - def __init__(self, mean_func, cov_func, index_set=Reals()): """Initialize probability space for a Gaussian process. @@ -136,16 +127,12 @@ def __init__(self, mean_func, cov_func, index_set=Reals()): """ def draw(): - return get_gaussian_process_result( - mean_func, - cov_func, - index_set) + return get_gaussian_process_result(mean_func, cov_func, index_set) super().__init__(draw) class GaussianProcess(RandomProcess, RV): - def __init__(self, mean_func, cov_func, index_set=Reals()): """Initialize Gaussian process. @@ -156,16 +143,13 @@ def __init__(self, mean_func, cov_func, index_set=Reals()): (by default, all real numbers) """ - prob_space = GaussianProcessProbabilitySpace(mean_func, - cov_func, - index_set) + prob_space = GaussianProcessProbabilitySpace(mean_func, cov_func, index_set) RandomProcess.__init__(self, prob_space) RV.__init__(self, prob_space) # Define convenience class for Brownian motion class BrownianMotionProbabilitySpace(GaussianProcessProbabilitySpace): - def __init__(self, drift=0, scale=1): """Initialize probability space for Brownian motion. @@ -175,12 +159,11 @@ def __init__(self, drift=0, scale=1): """ super().__init__( mean_func=lambda t: drift * t, - cov_func=lambda s, t: (scale ** 2) * min(s, t) + cov_func=lambda s, t: (scale ** 2) * min(s, t), ) class BrownianMotion(RandomProcess, RV): - def __init__(self, drift=0, scale=1): """Initialize Brownian motion. @@ -188,8 +171,6 @@ def __init__(self, drift=0, scale=1): drift: drift parameter of Brownian motion scale: scale parameter of Brownian motion """ - prob_space = BrownianMotionProbabilitySpace( - drift=drift, scale=scale - ) + prob_space = BrownianMotionProbabilitySpace(drift=drift, scale=scale) RandomProcess.__init__(self, prob_space) RV.__init__(self, prob_space) diff --git a/symbulate/independence.py b/symbulate/independence.py index 140cd60..0ae2753 100644 --- a/symbulate/independence.py +++ b/symbulate/independence.py @@ -1,6 +1,7 @@ from .probability_space import ProbabilitySpace from .random_variables import RV + def AssumeIndependent(*args): """Make RVs independent. @@ -20,7 +21,8 @@ def AssumeIndependent(*args): raise Exception( "AssumeIndependent(...) can only be " "used with RVs, but you passed in a " - "%s." % type(args[i]).__name__) + "%s." % type(args[i]).__name__ + ) for j in range(i + 1, len(args)): if args[i].prob_space == args[j].prob_space: raise Exception( @@ -28,13 +30,14 @@ def AssumeIndependent(*args): "called on RVs that are initially " "defined on different probability " "spaces." - ) + ) def draw(): outcome = [] for arg in args: outcome.append(arg.prob_space.draw()) return outcome + P = ProbabilitySpace(draw) outputs = [] @@ -42,6 +45,7 @@ def draw(): # i=i forces Python to bind i now def _func(x, func=arg.func, i=i): return func(x[i]) + outputs.append(RV(P, _func)) return tuple(outputs) diff --git a/symbulate/index_sets.py b/symbulate/index_sets.py index 386024b..899fb21 100644 --- a/symbulate/index_sets.py +++ b/symbulate/index_sets.py @@ -2,7 +2,6 @@ class IndexSet(object): - def __init__(self): return @@ -20,7 +19,6 @@ def __eq__(self, other): class Reals(IndexSet): - def __init__(self): return @@ -32,23 +30,19 @@ def __contains__(self, value): class Naturals(IndexSet): - def __init__(self): return def __contains__(self, value): try: - return ( - value >= 0 and - (isinstance(value, numbers.Integral) or - value.is_integer()) + return value >= 0 and ( + isinstance(value, numbers.Integral) or value.is_integer() ) except: return False class DiscreteTimeSequence(IndexSet): - def __init__(self, fs): self.fs = fs @@ -59,12 +53,9 @@ def __contains__(self, value): return float(value * self.fs).is_integer() def __eq__(self, index): - return ( - isinstance(index, DiscreteTimeSequence) and - (self.fs == index.fs) - ) + return isinstance(index, DiscreteTimeSequence) and (self.fs == index.fs) -class Integers(DiscreteTimeSequence): +class Integers(DiscreteTimeSequence): def __init__(self): self.fs = 1 diff --git a/symbulate/markov_chains.py b/symbulate/markov_chains.py index 668c974..646688d 100644 --- a/symbulate/markov_chains.py +++ b/symbulate/markov_chains.py @@ -4,15 +4,12 @@ from .math import inf from .probability_space import ProbabilitySpace from .random_variables import RV -from .result import ( - InfiniteVector, ContinuousTimeFunction, DiscreteValued -) +from .result import InfiniteVector, ContinuousTimeFunction, DiscreteValued EPS = 1e-15 class MarkovChainResult(InfiniteVector, DiscreteValued): - def __init__(self, transition_matrix, initial_dist, state_labels=None): # Check transition matrix for row in transition_matrix: @@ -27,15 +24,18 @@ def __init__(self, transition_matrix, initial_dist, state_labels=None): if m != n: raise Exception("Transition matrix must be square.") if len(initial_dist) != n: - raise Exception("Initial distribution must be a vector whose " - "length matches the dimensions of the " - "transition matrix.") + raise Exception( + "Initial distribution must be a vector whose " + "length matches the dimensions of the " + "transition matrix." + ) self.initial_dist = initial_dist # Process state labels if state_labels is not None: if len(state_labels) != n: - raise Exception("There must be as many state labels as " - "there are states.") + raise Exception( + "There must be as many state labels as " "there are states." + ) self.state_labels = state_labels else: self.state_labels = range(n) @@ -54,8 +54,7 @@ def _func(n): state = self.states[m - 1] for _ in range(m, n + 1): state = np.random.choice( - range(self.n_states), - p=self.transition_matrix[state, :] + range(self.n_states), p=self.transition_matrix[state, :] ) self.states.append(state) else: @@ -69,7 +68,6 @@ def get_states(self): class MarkovChainProbabilitySpace(ProbabilitySpace): - def __init__(self, transition_matrix, initial_dist, state_labels=None): """Initialize probability space for a (discrete-time) Markov chain. @@ -81,15 +79,12 @@ def __init__(self, transition_matrix, initial_dist, state_labels=None): """ def _draw(): - return MarkovChainResult(transition_matrix, - initial_dist, - state_labels) + return MarkovChainResult(transition_matrix, initial_dist, state_labels) super().__init__(_draw) class MarkovChain(RV): - def __init__(self, transition_matrix, initial_dist, state_labels=None): """Initialize a (discrete-time) Markov chain. @@ -101,18 +96,13 @@ def __init__(self, transition_matrix, initial_dist, state_labels=None): """ prob_space = MarkovChainProbabilitySpace( - transition_matrix, - initial_dist, - state_labels) + transition_matrix, initial_dist, state_labels + ) super().__init__(prob_space) -class ContinuousTimeMarkovChainResult(ContinuousTimeFunction, - DiscreteValued): - - def __init__(self, states, rates, - unscaled_interarrival_times, - state_labels): +class ContinuousTimeMarkovChainResult(ContinuousTimeFunction, DiscreteValued): + def __init__(self, states, rates, unscaled_interarrival_times, state_labels): self.states = states self.rates = rates self.times = unscaled_interarrival_times @@ -124,6 +114,7 @@ def interarrival_times(n): state = self.states[i] interarrival_time = self.times[i] / self.rates[state] return interarrival_time + self.interarrival_times = InfiniteVector(interarrival_times) def _func(t): @@ -135,11 +126,11 @@ def _func(t): if total_time > t: return self.state_labels[state] n += 1 + super().__init__(_func) class ContinuousTimeMarkovChainProbabilitySpace(ProbabilitySpace): - def __init__(self, generator_matrix, initial_dist, state_labels=None): """Initialize a probability space for a continuous-time Markov chain. @@ -157,27 +148,34 @@ def __init__(self, generator_matrix, initial_dist, state_labels=None): for j, q in enumerate(row): if j == i: if q > 0: - raise Exception("Diagonal elements of a generator matrix " + - "cannot be positive.") + raise Exception( + "Diagonal elements of a generator matrix " + + "cannot be positive." + ) else: if q < 0: - raise Exception("Off-diagonal elements of a generator matrix " + - "cannot be negative.") + raise Exception( + "Off-diagonal elements of a generator matrix " + + "cannot be negative." + ) # Check that dimensions agree self.generator_matrix = np.array(generator_matrix) m, n = self.generator_matrix.shape if m != n: raise Exception("Transition matrix must be square.") if len(initial_dist) != n: - raise Exception("Initial distribution must be a vector whose " - "length matches the dimensions of the " - "transition matrix.") + raise Exception( + "Initial distribution must be a vector whose " + "length matches the dimensions of the " + "transition matrix." + ) self.initial_dist = initial_dist # Process state labels if state_labels is not None: if len(state_labels) != n: - raise Exception("There must be as many state labels as " - "there are states.") + raise Exception( + "There must be as many state labels as " "there are states." + ) self.state_labels = state_labels else: self.state_labels = range(n) @@ -195,21 +193,17 @@ def __init__(self, generator_matrix, initial_dist, state_labels=None): # A continuous-time Markov chain is specified by the # sequence of states and the unscaled interarrival times. def _draw(): - states = MarkovChain(self.transition_matrix, - self.initial_dist).draw() + states = MarkovChain(self.transition_matrix, self.initial_dist).draw() rates = -np.diag(self.generator_matrix) unscaled_interarrival_times = (Exponential(1) ** inf).draw() return ContinuousTimeMarkovChainResult( - states, - rates, - unscaled_interarrival_times, - self.state_labels) + states, rates, unscaled_interarrival_times, self.state_labels + ) super().__init__(_draw) class ContinuousTimeMarkovChain(RV): - def __init__(self, generator_matrix, initial_dist, state_labels=None): """Initialize a continuous-time Markov chain. @@ -221,7 +215,6 @@ def __init__(self, generator_matrix, initial_dist, state_labels=None): """ prob_space = ContinuousTimeMarkovChainProbabilitySpace( - generator_matrix, - initial_dist, - state_labels) + generator_matrix, initial_dist, state_labels + ) super().__init__(prob_space) diff --git a/symbulate/math.py b/symbulate/math.py index 8cf3587..0b53732 100644 --- a/symbulate/math.py +++ b/symbulate/math.py @@ -6,12 +6,7 @@ import scipy.stats as stats from .random_variables import RV -from .result import ( - Tuple, - TimeFunction, - ContinuousTimeFunction, - DiscreteValued -) +from .result import Tuple, TimeFunction, ContinuousTimeFunction, DiscreteValued from .results import Results pi = math.pi @@ -21,8 +16,8 @@ floor = math.floor ceil = math.ceil -def operation_factory(operation): +def operation_factory(operation): def _op_func(x): if isinstance(x, (RV, Tuple, TimeFunction)): # recursively call op_fun until x is a scalar @@ -34,6 +29,7 @@ def _op_func(x): return _op_func + sqrt = operation_factory(math.sqrt) exp = operation_factory(math.exp) sin = operation_factory(math.sin) @@ -41,42 +37,52 @@ def _op_func(x): tan = operation_factory(math.tan) factorial = operation_factory(math.factorial) + def log(value, base=e): return operation_factory(lambda x: math.log(x, base))(value) + def mean(x): if isinstance(x, numbers.Real): raise Exception("Taking the mean with one value is unnecessary.") else: return sum(x) / len(x) + def cumsum(x): return x.cumsum() + def var(x): return mean([(i - mean(x)) ** 2 for i in x]) + def sd(x): return math.sqrt(var(x)) + def median(x): if isinstance(x, numbers.Real): raise Exception("Taking the median of one value is unnecessary.") else: return np.median(x) + def min_max_diff(x): if isinstance(x, numbers.Real): raise Exception("Taking the range of one value is unnecessary.") else: return max(x) - min(x) + def med_abs_dev(x): - return median(list(abs(i-median(x)) for i in x)) + return median(list(abs(i - median(x)) for i in x)) + def quantile(q): return lambda x: np.percentile(x, q * 100) + def iqr(x): if isinstance(x, numbers.Real): raise Exception("Taking the iqr of one value is unnecessary.") @@ -84,30 +90,36 @@ def iqr(x): q75, q25 = np.percentile(x, [75, 25]) return q75 - q25 + def orderstatistics(n): if n <= 0: raise Exception("Out of bounds. Lowest order is 1.") else: return lambda x: np.partition(x, n - 1)[n - 1] + def skewness(x): if isinstance(x, numbers.Real): raise Exception("Finding the skenewss of one value is unnecessary,") else: return stats.skew(x) + def kurtosis(x): if isinstance(x, numbers.Real): raise Exception("Finding the kurtosis of one value is unnecessary.") else: return stats.kurtosis(x) + def moment(k): return lambda x: stats.moment(x, k) + def trimmed_mean(alpha): return lambda x: stats.trim_mean(x, alpha) + def comparefun(x, compare, value): count = 0 for i in x: @@ -115,6 +127,7 @@ def comparefun(x, compare, value): count += 1 return count + def count(func=lambda x: True): def _func(x): val = 0 @@ -122,38 +135,52 @@ def _func(x): if func(i): val += 1 return val + return _func + def count_eq(value): def func(x): return comparefun(x, op.eq, value) + return func + def count_neq(value): def func(x): return comparefun(x, op.ne, value) + return func + def count_lt(value): def func(x): return comparefun(x, op.lt, value) + return func + def count_gt(value): def func(x): return comparefun(x, op.gt, value) + return func + def count_geq(value): def func(x): return comparefun(x, op.ge, value) + return func + def count_leq(value): def func(x): return comparefun(x, op.le, value) + return func + def interarrival_times(continuous_time_function): """Given a realization of a continuous-time, discrete-state process, returns the interarrival @@ -164,16 +191,17 @@ def interarrival_times(continuous_time_function): object, such as ContinuousTimeMarkovChainResult or PoissonProcessResult. """ - if not (isinstance(continuous_time_function, - ContinuousTimeFunction) and - isinstance(continuous_time_function, - DiscreteValued)): + if not ( + isinstance(continuous_time_function, ContinuousTimeFunction) + and isinstance(continuous_time_function, DiscreteValued) + ): raise TypeError( "Interarrival times are only defined for " "continuous-time, discrete-valued functions." ) return continuous_time_function.get_interarrival_times() + def arrival_times(continuous_time_function): """Given a realization of a continuous-time, discrete-state process, returns the arrival @@ -184,16 +212,17 @@ def arrival_times(continuous_time_function): object, such as ContinuousTimeMarkovChainResult or PoissonProcessResult. """ - if not (isinstance(continuous_time_function, - ContinuousTimeFunction) and - isinstance(continuous_time_function, - DiscreteValued)): + if not ( + isinstance(continuous_time_function, ContinuousTimeFunction) + and isinstance(continuous_time_function, DiscreteValued) + ): raise TypeError( "Interarrival times are only defined for " "continuous-time, discrete-valued functions." ) return continuous_time_function.get_arrival_times() + def states(discrete_valued_function): """Given a realization of a discrete-valued function, returns an InfiniteVector of the sequence of @@ -205,8 +234,5 @@ def states(discrete_valued_function): PoissonProcessResult) """ if not isinstance(discrete_valued_function, DiscreteValued): - raise TypeError( - "States are only defined for discrete-valued " - "functions." - ) + raise TypeError("States are only defined for discrete-valued " "functions.") return discrete_valued_function.get_states() diff --git a/symbulate/plot.py b/symbulate/plot.py index 6d76953..c97766d 100644 --- a/symbulate/plot.py +++ b/symbulate/plot.py @@ -12,20 +12,23 @@ xlim = plt.xlim ylim = plt.ylim + def init_color(): - hex_list = [colors.rgb2hex(rgb) for rgb in plt.cm.get_cmap('tab10').colors] - plt.rcParams["axes.prop_cycle"] = cycler('color', hex_list) + hex_list = [colors.rgb2hex(rgb) for rgb in plt.cm.get_cmap("tab10").colors] + plt.rcParams["axes.prop_cycle"] = cycler("color", hex_list) + def get_next_color(axes): color_cycle = axes._get_lines.prop_cycler color = next(color_cycle)["color"] return color -def configure_axes(axes, xdata, ydata, xlabel = None, ylabel = None): + +def configure_axes(axes, xdata, ydata, xlabel=None, ylabel=None): # Create 5% buffer on either end of plot so that leftmost and rightmost # lines are visible. However, if current axes are already bigger, # keep current axes. - buff = .05 * (max(xdata) - min(xdata)) + buff = 0.05 * (max(xdata) - min(xdata)) xmin, xmax = axes.get_xlim() xmin = min(xmin, min(xdata) - buff) xmax = max(xmax, max(xdata) + buff) @@ -34,20 +37,23 @@ def configure_axes(axes, xdata, ydata, xlabel = None, ylabel = None): _, ymax = axes.get_ylim() ymax = max(ymax, 1.05 * max(ydata)) plt.ylim(0, ymax) - + if xlabel is not None: plt.xlabel(xlabel) if ylabel is not None: plt.ylabel(ylabel) + def plot(*args, **kwargs): try: args[0].plot(**kwargs) except: plt.plot(*args, **kwargs) - + + def is_discrete(heights): - return sum([(i > 1) for i in heights]) > .8 * len(heights) + return sum([(i > 1) for i in heights]) > 0.8 * len(heights) + def count_var(x): counts = {} @@ -57,41 +63,46 @@ def count_var(x): else: counts[val] = 1 return counts - + + def compute_density(values): density = gaussian_kde(values) density.covariance_factor = lambda: 0.25 density._compute_covariance() return density + def setup_ticks(pos, lab, ax): ax.set_ticks(pos) ax.set_ticklabels(lab) - + + def add_colorbar(fig, type, mappable, label): - #create axis for cbar to place on left - if 'marginal' not in type: + # create axis for cbar to place on left + if "marginal" not in type: caxes = fig.add_axes([0, 0.1, 0.05, 0.8]) - else: #adjust height if marginals + else: # adjust height if marginals caxes = fig.add_axes([0, 0.1, 0.05, 0.57]) cbar = plt.colorbar(mappable=mappable, cax=caxes) - caxes.yaxis.set_ticks_position('left') + caxes.yaxis.set_ticks_position("left") cbar.set_label(label) - caxes.yaxis.set_label_position('left') + caxes.yaxis.set_label_position("left") return caxes + def setup_tile(v, bins, discrete): if not discrete: v_lab = np.linspace(min(v), max(v), bins + 1) v_pos = np.arange(0, len(v_lab)) - 0.5 v_vect = np.digitize(v, v_lab, right=True) - 1 else: - v_lab = np.unique(v) #returns sorted array + v_lab = np.unique(v) # returns sorted array v_pos = range(len(v_lab)) v_map = dict(zip(v_lab, v_pos)) v_vect = np.vectorize(v_map.get)(v) return v_vect, v_lab, v_pos + def make_tile(x, y, bins, discrete_x, discrete_y, ax): x_vect, x_lab, x_pos = setup_tile(x, bins, discrete_x) y_vect, y_lab, y_pos = setup_tile(y, bins, discrete_y) @@ -100,51 +111,63 @@ def make_tile(x, y, bins, discrete_x, discrete_y, ax): y_shape = len(y_lab) if discrete_y else len(y_lab) - 1 x_shape = len(x_lab) if discrete_x else len(x_lab) - 1 intensity = np.zeros(shape=(y_shape, x_shape)) - + for key, val in counts.items(): intensity[key] = val / nums - if not discrete_x: x_lab = np.around(x_lab, decimals=1) - if not discrete_y: y_lab = np.around(y_lab, decimals=1) - hm = ax.matshow(intensity, cmap='Blues', origin='lower', aspect='auto', vmin=0) - ax.xaxis.set_ticks_position('bottom') + if not discrete_x: + x_lab = np.around(x_lab, decimals=1) + if not discrete_y: + y_lab = np.around(y_lab, decimals=1) + hm = ax.matshow(intensity, cmap="Blues", origin="lower", aspect="auto", vmin=0) + ax.xaxis.set_ticks_position("bottom") setup_ticks(x_pos, x_lab, ax.xaxis) setup_ticks(y_pos, y_lab, ax.yaxis) return hm + def make_violin(data, positions, ax, axis, alpha): values = [] - i, j = (0, 1) if axis == 'x' else (1, 0) + i, j = (0, 1) if axis == "x" else (1, 0) values = [data[data[:, i] == pos, j].tolist() for pos in positions] - violins = ax.violinplot(dataset=values, showmedians=True, - vert=False if axis == 'y' else True) - setup_ticks(np.array(positions) + 1, positions, - ax.xaxis if axis == 'x' else ax.yaxis) - for part in violins['bodies']: - part.set_edgecolor('black') + violins = ax.violinplot( + dataset=values, showmedians=True, vert=False if axis == "y" else True + ) + setup_ticks( + np.array(positions) + 1, positions, ax.xaxis if axis == "x" else ax.yaxis + ) + for part in violins["bodies"]: + part.set_edgecolor("black") part.set_alpha(alpha) - for component in ('cbars', 'cmins', 'cmaxes', 'cmedians'): + for component in ("cbars", "cmins", "cmaxes", "cmedians"): vp = violins[component] - vp.set_edgecolor('black') + vp.set_edgecolor("black") vp.set_linewidth(1) + def make_marginal_impulse(count, color, ax_marg, alpha, axis): key, val = list(count.keys()), list(count.values()) tot = sum(val) val = [i / tot for i in val] - if axis == 'x': + if axis == "x": ax_marg.vlines(key, 0, val, color=color, alpha=alpha) - elif axis == 'y': + elif axis == "y": ax_marg.hlines(key, 0, val, color=color, alpha=alpha) + def make_density2D(x, y, ax): res = np.vstack([x, y]) density = gaussian_kde(res) xmax, xmin = max(x), min(x) ymax, ymin = max(y), min(y) - Xgrid, Ygrid = np.meshgrid(np.linspace(xmin, xmax, 100), - np.linspace(ymin, ymax, 100)) + Xgrid, Ygrid = np.meshgrid( + np.linspace(xmin, xmax, 100), np.linspace(ymin, ymax, 100) + ) Z = density.evaluate(np.vstack([Xgrid.ravel(), Ygrid.ravel()])) - den = ax.imshow(Z.reshape(Xgrid.shape), origin='lower', cmap='Blues', - aspect='auto', extent=[xmin, xmax, ymin, ymax] + den = ax.imshow( + Z.reshape(Xgrid.shape), + origin="lower", + cmap="Blues", + aspect="auto", + extent=[xmin, xmax, ymin, ymax], ) return den diff --git a/symbulate/poisson_process.py b/symbulate/poisson_process.py index 0aeb998..6719c0a 100644 --- a/symbulate/poisson_process.py +++ b/symbulate/poisson_process.py @@ -1,18 +1,12 @@ from .distributions import Exponential from .math import inf from .probability_space import ProbabilitySpace -from .result import ( - InfiniteVector, - ContinuousTimeFunction, - DiscreteValued -) +from .result import InfiniteVector, ContinuousTimeFunction, DiscreteValued from .random_variables import RV from .random_processes import RandomProcess -class PoissonProcessResult(ContinuousTimeFunction, - DiscreteValued): - +class PoissonProcessResult(ContinuousTimeFunction, DiscreteValued): def __init__(self, interarrival_times): self.interarrival_times = interarrival_times @@ -30,7 +24,6 @@ def get_states(self): class PoissonProcessProbabilitySpace(ProbabilitySpace): - def __init__(self, rate): """Initialize probability space for a Poisson process. @@ -47,7 +40,6 @@ def draw(): class PoissonProcess(RandomProcess, RV): - def __init__(self, rate): """Initialize a Poisson process. diff --git a/symbulate/probability_space.py b/symbulate/probability_space.py index c03617c..de30703 100644 --- a/symbulate/probability_space.py +++ b/symbulate/probability_space.py @@ -1,4 +1,8 @@ import numpy as np +from typing import List +from typing import Optional + +from rich.progress import track from .base import Logical from .result import Vector, InfiniteVector, join @@ -16,7 +20,7 @@ class ProbabilitySpace: def __init__(self, draw): self.draw = draw - def sim(self, n): + def sim(self, n: int, show_progress: Optional[bool] = True): """Simulate n draws from probability space. Args: @@ -25,7 +29,12 @@ def sim(self, n): Returns: Results: A list-like object containing the simulation results. """ - return Results(self.draw() for _ in range(n)) + f = ( + lambda num_draws: track(range(n), "Simulating...") + if show_progress + else range(num_draws) + ) + return Results(self.draw() for _ in f(n)) def check_same(self, other): if self != other: @@ -41,29 +50,36 @@ def apply(self, func): A new ProbabilitySpace where each realization is func applied to a realization from the current probability space. """ + def draw(): return func(self.draw()) + return ProbabilitySpace(draw) def __mul__(self, other): def draw(): return join(self.draw(), other.draw()) + return ProbabilitySpace(draw) def __pow__(self, exponent): if exponent == float("inf"): + def draw(): def _func(_): return self.draw() + return InfiniteVector(_func) + else: + def draw(): return Vector(self.draw() for _ in range(exponent)) + return ProbabilitySpace(draw) class Event(Logical): - def __init__(self, prob_space, func): self.prob_space = prob_space self.func = func @@ -74,12 +90,10 @@ def check_same_prob_space(self, other): # The Logical superclass will use this to define the three # logical operations: and (&), or (|), not (~). def _logical_factory(self, op): - def _op_func(self, other=None): # other will be None when op is the "not" operator if other is None: - return Event(self.prob_space, - lambda outcome: op(self.func(outcome))) + return Event(self.prob_space, lambda outcome: op(self.func(outcome))) else: if isinstance(other, Event): self.check_same_prob_space(other) @@ -87,10 +101,12 @@ def _op_func(self, other=None): raise TypeError( "Logical operations are only defined " "between two Events, not between an Event " - "and a %s." % type(other).__name__) - return Event(self.prob_space, - lambda outcome: op(self.func(outcome), - other.func(outcome))) + "and a %s." % type(other).__name__ + ) + return Event( + self.prob_space, + lambda outcome: op(self.func(outcome), other.func(outcome)), + ) return _op_func @@ -98,10 +114,12 @@ def _op_func(self, other=None): # which evaluate to ((2 < X) and (X < 5)). This unfortunately # is not well-defined in Python and cannot be overloaded. def __bool__(self): - raise Exception("Cannot cast an Event to a boolean. " - "You may be getting this error if you " - "wrote an expression like (2 < X < 5). " - "Try ((2 < X) & (X < 5)) instead.") + raise Exception( + "Cannot cast an Event to a boolean. " + "You may be getting this error if you " + "wrote an expression like (2 < X < 5). " + "Try ((2 < X) & (X < 5)) instead." + ) def draw(self): return self.func(self.prob_space.draw()) @@ -139,9 +157,7 @@ def __init__(self, box, size=None, replace=True, probs=None, order_matters=True) self.box.extend([ticket] * count) self.probs = None else: - raise Exception( - "Box must be specified either as a list or a dict." - ) + raise Exception("Box must be specified either as a list or a dict.") self.size = None if size == 1 else size self.replace = replace self.order_matters = order_matters @@ -150,7 +166,7 @@ def __init__(self, box, size=None, replace=True, probs=None, order_matters=True) # If drawing without replacement, check that the number # of draws does not exceed the number of tickets in the box. - if not self.replace and self.size > len(self.box): + if not self.replace and self.size and self.size > len(self.box): raise Exception( "Cannot draw more tickets (without replacement) " "than there are tickets in the box." @@ -176,8 +192,10 @@ def draw_inds(size): if self.size is None: return self.box[draw_inds(None)] elif self.size == float("inf"): + def _func(_): return self.box[draw_inds(None)] + return self.infinite_output_type(_func) else: draws = [self.box[i] for i in draw_inds(self.size)] @@ -201,5 +219,41 @@ def __init__(self, size=None, replace=False, order_matters=True): for rank in list(range(2, 11)) + ["J", "Q", "K", "A"]: for suit in ["Diamonds", "Hearts", "Clubs", "Spades"]: box.append((rank, suit)) - super().__init__(box, size, replace, - probs=None, order_matters=order_matters) + super().__init__(box, size, replace, probs=None, order_matters=order_matters) + + +class Dice(BoxModel): + def __init__( + self, + sides: Optional[List[int]] = [1, 2, 3, 4, 5, 6], + probs: Optional[List[float]] = [1 / 6] * 6, + size: Optional[int] = None, + order_matters: Optional[bool] = True, + replace: Optional[bool] = False, + ): + BoxModel.__init__( + self, + box=sides, + probs=probs, + size=size, + replace=replace, + order_matters=order_matters, + ) + + +class Coin(BoxModel): + def __init__( + self, + probs: Optional[List[float]] = [0.5, 0.5], + size: Optional[int] = None, + order_matters: Optional[bool] = True, + replace: Optional[bool] = False, + ): + BoxModel.__init__( + self, + ["T", "H"], + probs=probs, + size=size, + replace=replace, + order_matters=order_matters, + ) diff --git a/symbulate/random_processes.py b/symbulate/random_processes.py index fbf2a3f..c652bba 100644 --- a/symbulate/random_processes.py +++ b/symbulate/random_processes.py @@ -26,8 +26,9 @@ class RandomProcess(RV): the value of the process is simply x(t).) """ - def __init__(self, prob_space, index_set=Naturals(), - func=lambda outcome, t: outcome[t]): + def __init__( + self, prob_space, index_set=Naturals(), func=lambda outcome, t: outcome[t] + ): self.index_set = index_set # This dict stores random variables at specific times. self.rvs = {} @@ -39,6 +40,7 @@ def x(t): if t in self.rvs: return self.rvs[t].func(outcome) return func(outcome, t) + return TimeFunction.from_index_set(self.index_set, x) super().__init__(prob_space, _func) @@ -46,8 +48,7 @@ def x(t): def __setitem__(self, t, value): if t not in self.index_set: raise KeyError( - "Time %s is not in the index set for this " - "random process." % str(t) + "Time %s is not in the index set for this " "random process." % str(t) ) # If value is a RV, store it in self.rvs. if isinstance(value, RV): diff --git a/symbulate/random_variables.py b/symbulate/random_variables.py index 38e9e11..fc34f5d 100644 --- a/symbulate/random_variables.py +++ b/symbulate/random_variables.py @@ -1,8 +1,12 @@ +from typing import Optional +from rich.progress import track + from .base import Arithmetic, Transformable, Comparable -from .probability_space import Event -from .result import Vector, join, is_scalar, is_numeric_vector +from .probability_space import Event, Coin +from .result import Vector, join, is_scalar, is_numeric_vector, is_vector from .results import RVResults + class RV(Arithmetic, Transformable, Comparable): """Defines a random variable. @@ -58,7 +62,7 @@ def draw(self): """ return self.func(self.prob_space.draw()) - def sim(self, n): + def sim(self, n: int, show_progress: Optional[bool] = True): """Simulate n draws from probability space described by the random variable. @@ -68,14 +72,20 @@ def sim(self, n): Returns: RVResults: A list-like object containing the simulation results. """ - - return RVResults(self.draw() for _ in range(n)) + f = ( + lambda num_draws: track(range(n), "Simulating...") + if show_progress + else range(num_draws) + ) + return RVResults(self.draw() for _ in f(n)) def __call__(self, outcome): - print("Warning: Calling an RV as a function simply applies the " - "function that defines the RV to the input, regardless of " - "whether that input is a possible outcome in the underlying " - "probability space.") + print( + "Warning: Calling an RV as a function simply applies the " + "function that defines the RV to the input, regardless of " + "whether that input is a possible outcome in the underlying " + "probability space." + ) return self.func(outcome) def check_same_prob_space(self, other): @@ -103,8 +113,10 @@ def g(x): return log(x ** 2) Y = X.apply(g) """ + def _func(outcome): return func(self.func(outcome)) + return RV(self.prob_space, _func) # This allows us to unpack a random vector, @@ -123,32 +135,29 @@ def __iter__(self): def __getitem__(self, n): # if n is an RV, return a new random variable if isinstance(n, RV): - return RV(self.prob_space, - lambda x: self.func(x)[n.func(x)]) + return RV(self.prob_space, lambda x: self.func(x)[n.func(x)]) # if the indices are a list, return a random vector elif is_numeric_vector(n): - return self.apply( - lambda x: Vector(x[i] for i in n) - ) + return self.apply(lambda x: Vector(x[i] for i in n)) # if the indices are a slice, return a random vector elif isinstance(n, slice): return self.apply( - lambda x: Vector(x[i] for i in - range(n.start, n.stop, n.step or 1)) - ) + lambda x: Vector(x[i] for i in range(n.start, n.stop, n.step or 1)) + ) # otherwise, return the nth value return self.apply(lambda x: x[n]) # The Arithmetic superclass will use this to define all of the # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.) def _operation_factory(self, op): - def _op_func(self, other): # operations between this RV and another RV if isinstance(other, RV): self.check_same_prob_space(other) + def _func(outcome): return op(self.func(outcome), other.func(outcome)) + return RV(self.prob_space, _func) # operations between this RV and a scalar return self.apply(lambda x: op(x, other)) @@ -159,15 +168,12 @@ def _func(outcome): # usual comparison operations (e.g., <, >, ==, !=, etc.). # Note that a comparison of a random variable returns an Event. def _comparison_factory(self, op): - def _op_func(self, other): if is_scalar(other): - return Event(self.prob_space, - lambda x: op(self.func(x), other)) + return Event(self.prob_space, lambda x: op(self.func(x), other)) elif isinstance(other, RV): self.check_same_prob_space(other) - return Event(self.prob_space, - lambda x: op(self.func(x), other.func(x))) + return Event(self.prob_space, lambda x: op(self.func(x), other.func(x))) raise NotImplementedError( "Comparisons are only defined between two RVs or " "between an RV and a scalar." @@ -175,16 +181,19 @@ def _op_func(self, other): return _op_func - # Define a joint distribution of two random variables: e.g., X & Y def __and__(self, other): self.check_same_prob_space(other) if isinstance(other, RV): + def _func(outcome): return join(self.func(outcome), other.func(outcome)) + elif is_scalar(other): + def _func(outcome): return join(self.func(outcome), other) + else: raise Exception("Joint distributions are only defined for RVs.") return RV(self.prob_space, _func) @@ -192,8 +201,10 @@ def _func(outcome): def __rand__(self, other): self.check_same_prob_space(other) if is_scalar(other): + def _func(outcome): return join(other, self.func(outcome)) + return RV(self.prob_space, _func) # Define conditional distribution of random variable. @@ -231,8 +242,7 @@ class RVConditional(RV): def __init__(self, random_variable, condition_event): self.condition_event = condition_event - super().__init__(random_variable.prob_space, - random_variable.func) + super().__init__(random_variable.prob_space, random_variable.func) def draw(self): """A function that takes no arguments and returns a value from @@ -246,3 +256,16 @@ def draw(self): outcome = self.prob_space.draw() if self.condition_event.func(outcome): return self.func(outcome) + + +def _coin_rv_map(x): + if not isinstance(x, str) and is_vector(x): + return tuple(map(lambda v: 1 if v == "H" else 0, x)) + return 1 if x == "H" else 0 + + +class CoinRV(RV): + def __init__(self, prob_space: Coin): + if not isinstance(prob_space, Coin): + raise ValueError("prob_sapce must of type Coin or Coin's subtypes") + RV.__init__(self, prob_space, func=_coin_rv_map) diff --git a/symbulate/result.py b/symbulate/result.py index b913103..4d56029 100644 --- a/symbulate/result.py +++ b/symbulate/result.py @@ -8,7 +8,6 @@ class Scalar(numbers.Number): - def __new__(cls, value, *args, **kwargs): if isinstance(value, numbers.Integral): return Int(value) @@ -19,31 +18,25 @@ def __new__(cls, value, *args, **kwargs): class Int(int, Scalar): - def __new__(cls, value, *args, **kwargs): return super(Int, cls).__new__(cls, value) class Float(float, Scalar): - def __new__(cls, value, *args, **kwargs): return super(Float, cls).__new__(cls, value) class Tuple(Arithmetic, Transformable, Statistical, Filterable): - """A collapsible data structure. - """ + """A collapsible data structure.""" def __init__(self, values): if is_scalar(values): - self.values = (values, ) + self.values = (values,) elif hasattr(values, "__len__") or hasattr(values, "__next__"): self.values = tuple(values) else: - raise Exception( - "Tuples can only be created from " - "finite iterable data." - ) + raise Exception("Tuples can only be created from " "finite iterable data.") def __getitem__(self, n): # if n is a numeric array, return a Tuple of those values @@ -114,7 +107,6 @@ def filter(self, filt): # The Arithmetic superclass will use this to define all of the # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). def _operation_factory(self, op): - def _op_func(self, other): if is_number(other): return type(self)(op(value, other) for value in self) @@ -125,14 +117,16 @@ def _op_func(self, other): "Arithmetic operations between a %s and a %s " "are only valid if they are the same length. " "You attempted to combine a %s of length %d " - "with a %s of length %d." % ( + "with a %s of length %d." + % ( type(self).__name__, type(other).__name__, type(self).__name__, len(self), type(other).__name__, - len(other) - )) + len(other), + ) + ) # return a new Tuple/Vector of the same length return type(self)(op(a, b) for a, b in zip(self, other)) else: @@ -145,13 +139,14 @@ def _op_func(self, other): def _statistic_factory(self, op): def _op_func(self): return op(self.values) + return _op_func def cumsum(self): return type(self)(np.cumsum(self.values)) def plot(self, **kwargs): - plt.plot(range(len(self)), self.values, '.--', **kwargs) + plt.plot(range(len(self)), self.values, ".--", **kwargs) def __str__(self): if len(self) <= 6: @@ -166,13 +161,12 @@ def __repr__(self): class Vector(Tuple): - """A data structure like a Tuple, except it does not collapse. - """ + """A data structure like a Tuple, except it does not collapse.""" + pass class TimeFunction(Arithmetic): - @classmethod def from_index_set(cls, index_set, func=None): if isinstance(index_set, DiscreteTimeSequence): @@ -192,13 +186,13 @@ def check_same_index_set(self, other): "TimeFunctions with the same index set." ) else: - raise Exception("Cannot combine %s with %s." % ( - type(self).__name__, type(other).__name__ - )) + raise Exception( + "Cannot combine %s with %s." + % (type(self).__name__, type(other).__name__) + ) class InfiniteTuple(TimeFunction): - def __init__(self, func=lambda n: n): """Initializes a (lazy) data structure for an infinite vector. @@ -270,7 +264,6 @@ def log_squared(n): # The Arithmetic superclass will use this to define all of the # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). def _operation_factory(self, op): - def _op_func(self, other): self.check_same_index_set(other) if is_number(other): @@ -284,20 +277,19 @@ def _op_func(self, other): class InfiniteVector(InfiniteTuple): - def cumsum(self): def _func(n): return sum(self[i] for i in range(n + 1)) + return InfiniteVector(_func) def plot(self, tmin=0, tmax=10, **kwargs): xs = range(tmin, tmax) ys = [self[t] for t in range(tmin, tmax)] - plt.plot(xs, ys, '.--', **kwargs) + plt.plot(xs, ys, ".--", **kwargs) class DiscreteTimeFunction(TimeFunction): - def __init__(self, func=None, fs=1, index_set=None): """Initializes a data structure for a discrete-time function. @@ -318,15 +310,16 @@ def __init__(self, func=None, fs=1, index_set=None): self.index_set = DiscreteTimeSequence(fs) else: self.index_set = index_set - self.array_pos = [] # stores values for t >= 0 - self.array_neg = [] # stores values for t < 0 + self.array_pos = [] # stores values for t >= 0 + self.array_neg = [] # stores values for t < 0 def _get_value_at_index(self, n): if not isinstance(n, numbers.Integral): raise KeyError( "For a DiscreteTimeFunction f, f[n] returns the " "the nth time sample, so n must be an integer. " - "If you want the value at time t, try f(t) instead.") + "If you want the value at time t, try f(t) instead." + ) if n >= 0: m = len(self.array_pos) @@ -344,9 +337,13 @@ def _get_value_at_index(self, n): def _get_value_at_time(self, t): fs = self.index_set.fs if not t in self.index_set: - raise KeyError(( - "No value at time %.2f for a function with " - "a sampling rate of %d Hz.") % (t, fs)) + raise KeyError( + ( + "No value at time %.2f for a function with " + "a sampling rate of %d Hz." + ) + % (t, fs) + ) return self._get_value_at_index(int(t * fs)) def __getitem__(self, n): @@ -355,11 +352,14 @@ def __getitem__(self, n): elif is_numeric_vector(n): return Vector(self._get_value_at_index(e) for e in n) elif isinstance(n, slice): - return Vector(self._get_value_at_index(e) for e in - range(n.start, n.stop, n.step or 1)) + return Vector( + self._get_value_at_index(e) for e in range(n.start, n.stop, n.step or 1) + ) else: - raise TypeError("Cannot evaluate DiscreteTimeFunction at " - "index %s (type %s)." % (n, type(n).__name__)) + raise TypeError( + "Cannot evaluate DiscreteTimeFunction at " + "index %s (type %s)." % (n, type(n).__name__) + ) def __call__(self, t): if is_number(t): @@ -368,11 +368,14 @@ def __call__(self, t): return Vector(self._get_value_at_time(e) for e in t) elif isinstance(t, DiscreteTimeFunction): self.check_same_index_set(t) - return DiscreteTimeFunction(func=lambda n: self(t[n]), - index_set=self.index_set) + return DiscreteTimeFunction( + func=lambda n: self(t[n]), index_set=self.index_set + ) else: - raise TypeError("Cannot evaluate DiscreteTimeFunction at " - "time %s (type %s)." % (t, type(t).__name__)) + raise TypeError( + "Cannot evaluate DiscreteTimeFunction at " + "time %s (type %s)." % (t, type(t).__name__) + ) def apply(self, func): """Compose function with the TimeFunction. @@ -395,24 +398,20 @@ def log_squared(f): return log(f) ** 2 g = f.apply(log_squared) """ - return DiscreteTimeFunction(lambda n: func(self[n]), - index_set=self.index_set) + return DiscreteTimeFunction(lambda n: func(self[n]), index_set=self.index_set) # The Arithmetic superclass will use this to define all of the # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). def _operation_factory(self, op): - def _op_func(self, other): self.check_same_index_set(other) if is_number(other): return DiscreteTimeFunction( - lambda n: op(self[n], other), - index_set=self.index_set + lambda n: op(self[n], other), index_set=self.index_set ) elif isinstance(other, DiscreteTimeFunction): return DiscreteTimeFunction( - lambda n: op(self[n], other[n]), - index_set=self.index_set + lambda n: op(self[n], other[n]), index_set=self.index_set ) else: return NotImplemented @@ -435,7 +434,6 @@ def plot(self, tmin=0, tmax=10, **kwargs): class ContinuousTimeFunction(TimeFunction): - def __init__(self, func=lambda t: t): """Initializes a data structure for a discrete-time function. @@ -461,8 +459,10 @@ def __call__(self, t): elif isinstance(t, ContinuousTimeFunction): return ContinuousTimeFunction(func=lambda s: self(t(s))) else: - raise TypeError("Cannot evaluate ContinuousTimeFunction at " - "time %s (type %s)." % (t, type(t).__name__)) + raise TypeError( + "Cannot evaluate ContinuousTimeFunction at " + "time %s (type %s)." % (t, type(t).__name__) + ) def __getitem__(self, t): return self(t) @@ -494,17 +494,12 @@ def log_squared(f): # The Arithmetic superclass will use this to define all of the # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). def _operation_factory(self, op): - def _op_func(self, other): self.check_same_index_set(other) if is_number(other): - return ContinuousTimeFunction( - lambda t: op(self(t), other) - ) + return ContinuousTimeFunction(lambda t: op(self(t), other)) elif isinstance(other, ContinuousTimeFunction): - return ContinuousTimeFunction( - lambda t: op(self(t), other(t)) - ) + return ContinuousTimeFunction(lambda t: op(self(t), other(t))) else: return NotImplemented @@ -523,23 +518,19 @@ def plot(self, tmin=0, tmax=10, **kwargs): class DiscreteValued: - def get_states(self): if not hasattr(self, "states"): - raise NameError("States not defined for " - "function.") + raise NameError("States not defined for " "function.") return self.states def get_interarrival_times(self): if not hasattr(self, "interarrival_times"): - raise NameError("Interarrival times not " - "defined for function.") + raise NameError("Interarrival times not " "defined for function.") return self.interarrival_times def get_arrival_times(self): if not hasattr(self, "interarrival_times"): - raise NameError("Interarrival times not " - "defined for function.") + raise NameError("Interarrival times not " "defined for function.") return self.interarrival_times.cumsum() @@ -551,8 +542,8 @@ def join(result1, result2): result2: The second result. """ - a = tuple(result1.values) if type(result1) == Tuple else (result1, ) - b = tuple(result2.values) if type(result2) == Tuple else (result2, ) + a = tuple(result1.values) if type(result1) == Tuple else (result1,) + b = tuple(result2.values) if type(result2) == Tuple else (result2,) return Tuple(a + b) @@ -583,13 +574,15 @@ def _func(n): return values[n] else: return arg[n - len(values)] + return type(arg)(_func) - raise Exception("InfiniteTuple must be the last " - "argument to concat().") + raise Exception("InfiniteTuple must be the last " "argument to concat().") else: - raise TypeError("Every argument to concat() must be either " - "a scalar, a vector, or an InfiniteTuple.") + raise TypeError( + "Every argument to concat() must be either " + "a scalar, a vector, or an InfiniteTuple." + ) return Vector(values) diff --git a/symbulate/results.py b/symbulate/results.py index 0f72d2c..01e600c 100644 --- a/symbulate/results.py +++ b/symbulate/results.py @@ -14,30 +14,46 @@ from matplotlib.ticker import NullFormatter from matplotlib.transforms import Affine2D -from .base import (Arithmetic, Statistical, Comparable, - Logical, Filterable, Transformable) -from .plot import (configure_axes, init_color, get_next_color, is_discrete, - count_var, compute_density, add_colorbar, - setup_ticks, make_tile, make_violin, - make_marginal_impulse, make_density2D) -from .result import (Scalar, Vector, TimeFunction, - is_number, is_numeric_vector) +from rich.table import Table as RichTable + +from .base import ( + Arithmetic, + Statistical, + Comparable, + Logical, + Filterable, + Transformable, +) +from .plot import ( + configure_axes, + init_color, + get_next_color, + is_discrete, + count_var, + compute_density, + add_colorbar, + setup_ticks, + make_tile, + make_violin, + make_marginal_impulse, + make_density2D, +) +from .result import Scalar, Vector, TimeFunction, is_number, is_numeric_vector from .table import Table -plt.style.use('seaborn-colorblind') +plt.style.use("seaborn-colorblind") def _is_hashable(obj): return hasattr(obj, "__hash__") + def _is_boolean_vector(vector): return all(isinstance(x, (bool, np.bool_)) for x in vector) -class Results(Arithmetic, Statistical, Comparable, - Logical, Filterable, Transformable): - +class Results(Arithmetic, Statistical, Comparable, Logical, Filterable, Transformable): def __init__(self, results, sim_id=None): self.results = list(results) self.sim_id = time.time() if sim_id is None else sim_id @@ -54,10 +70,7 @@ def apply(self, func): the function to each outcome from the original Results object. """ - return type(self)( - [func(result) for result in self.results], - self.sim_id - ) + return type(self)([func(result) for result in self.results], self.sim_id) def __getitem__(self, n): # if n is a Results object, use it as a boolean mask @@ -66,9 +79,7 @@ def __getitem__(self, n): # if n is a numeric array of values, return a Results # object with those dimensions elif is_numeric_vector(n): - return self.apply( - lambda result: type(result)(result[i] for i in n) - ) + return self.apply(lambda result: type(result)(result[i] for i in n)) # otherwise, return the nth value of every simulation return self.apply(lambda result: result[n]) @@ -99,9 +110,7 @@ def get(self, n): # if n is a numeric array, return a Results object with those results if is_numeric_vector(n): - return type(self)( - self.results[i] for i in n - ) + return type(self)(self.results[i] for i in n) # otherwise, return the nth result (this also works when n is a slice) return self.results[n] @@ -163,13 +172,10 @@ def filter(self, filt): ) if len(filt) != len(self): raise ValueError( - "Filter must be the same length as the " - "Results object." + "Filter must be the same length as the " "Results object." ) if not _is_boolean_vector(filt): - raise ValueError( - "Every element in the filter must be a boolean." - ) + raise ValueError("Every element in the filter must be a boolean.") return type(self)(x for x, cond in zip(self, filt) if cond) elif callable(filt): return type(self)(x for x in self if filt(x)) @@ -182,23 +188,15 @@ def filter(self, filt): # The Arithmetic superclass will use this to define all of the # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). def _operation_factory(self, op): - def _op_func(self, other): if isinstance(other, Results): if len(self) != len(other): - raise Exception( - "Results objects must be of the " - "same length." - ) + raise Exception("Results objects must be of the " "same length.") if self.sim_id != other.sim_id: raise Exception( - "Results objects must come from the " - "same simulation." + "Results objects must come from the " "same simulation." ) - return type(self)( - [op(x, y) for x, y in zip(self, other)], - self.sim_id - ) + return type(self)([op(x, y) for x, y in zip(self, other)], self.sim_id) else: return self.apply(lambda x: op(x, other)) @@ -212,10 +210,12 @@ def _comparison_factory(self, op): # The Statistical superclass will use this to define all of the # usual comparison operations (e.g., <, >, ==, !=, etc.). def _statistic_factory(self, _): - raise Exception("Statistical functions are only available " - "for simulations of random variables. " - "Define a RV on this probability space " - "and then try again.") + raise Exception( + "Statistical functions are only available " + "for simulations of random variables. " + "Define a RV on this probability space " + "and then try again." + ) def _multivariate_statistic_factory(self, op): self._statistic_factory(op) @@ -223,56 +223,72 @@ def _multivariate_statistic_factory(self, op): # The Logical superclass will use this to define the three # logical operations: and (&), or (|), not (~). def _logical_factory(self, op): - def _op_func(self, other=None): # check that the vector only contains booleans if not _is_boolean_vector(self): raise ValueError( "Logical operations are only defined for " - "boolean (True/False) Results objects.") + "boolean (True/False) Results objects." + ) # other will be None when op is the "not" operator if other is None: return Results([op(x) for x in self], self.sim_id) else: if isinstance(other, Results): if self.sim_id != other.sim_id: - raise Exception("Results objects must come " - "from the same simulation.") + raise Exception( + "Results objects must come " "from the same simulation." + ) if not _is_boolean_vector(other): raise ValueError( "Logical operations are only defined for " - "boolean (True/False) Results objects.") + "boolean (True/False) Results objects." + ) else: raise TypeError( "Logical operations are only defined " "between two Results, not between a Result " - "and a %s." % type(other).__name__) - return Results( - [op(x, y) for x, y in zip(self, other)], - self.sim_id - ) + "and a %s." % type(other).__name__ + ) + return Results([op(x, y) for x, y in zip(self, other)], self.sim_id) return _op_func - def plot(self): - raise Exception("Only simulations of random variables (RV) " - "can be plotted, but you simulated from a " - "probability space. You must first define a RV " - "on your probability space and simulate it. " - "Then call .plot() on those simulations.") - + raise Exception( + "Only simulations of random variables (RV) " + "can be plotted, but you simulated from a " + "probability space. You must first define a RV " + "on your probability space and simulate it. " + "Then call .plot() on those simulations." + ) + + def __rich__(self) -> str: + rich_table = RichTable("Index", "Result") + results_len = len(self.results) + for i, result in enumerate(self.results): + if results_len >= 12 and i == 9: + rich_table.add_row("..", ".") + break + else: + rich_table.add_row(str(i), str(result)) + + if results_len >= 12: + rich_table.add_row(str(results_len - 1), str(self.results[-1])) + + return rich_table + def __repr__(self): i_last = len(self) - 1 max_index_length = len(str(i_last)) if max_index_length <= 5: - index_header_space = '' - index_value_space = ' ' * 4 + index_header_space = "" + index_value_space = " " * 4 else: - index_header_space = ' ' * (max_index_length - 5) - index_value_space = ' ' * (max_index_length - 1) + index_header_space = " " * (max_index_length - 5) + index_value_space = " " * (max_index_length - 1) table_rows = [] @@ -282,62 +298,28 @@ def __repr__(self): table_rows.append(f"{str(i)}{index_value_space} {str(result)}") if len(self) > 9 and i >= 8: - index_value_space = ' ' * (5 - len(str(i_last))) + index_value_space = " " * (5 - len(str(i_last))) if len(self) > 11: table_rows.append( f"{'.' * max_index_length}{index_value_space} " - f"{'.' * len(str(self.get(i_last)))}") + f"{'.' * len(str(self.get(i_last)))}" + ) elif len(self) == 11: table_rows.append( f"{str(i_last - 1)}{' ' * (5 - len(str(i_last - 1)))} " - f"{str(self.get(i_last - 1))}") + f"{str(self.get(i_last - 1))}" + ) table_rows.append( f"{str(i_last)}{index_value_space} {str(self.get(i_last))}" ) break - return '\n'.join(table_rows) - - def _repr_html_(self): - - table_template = ''' - - - - - - - {table_body} - -
IndexResult
- ''' - row_template = ''' - - %s%s - - ''' - - def _truncate(result): - if len(result) > 100: - return result[:100] + "..." - return result - - table_body = "" - for i, result in enumerate(self.results): - table_body += row_template % (i, _truncate(str(result))) - # if we've already printed 9 rows, skip to end - if i >= 8: - table_body += "......" - i_last = len(self) - 1 - table_body += row_template % (i_last, _truncate(str(self.get(i_last)))) - break - return table_template.format(table_body=table_body) + return "\n".join(table_rows) class RVResults(Results): - def __init__(self, results, sim_id=None): super().__init__(results, sim_id) init_color() @@ -361,11 +343,11 @@ def __init__(self, results, sim_id=None): self.dim = None # iterate over remaining results, ensure they are consistent with the first for result in iterresults: - if (isinstance(result, TimeFunction) and - result.index_set != self.index_set): + if isinstance(result, TimeFunction) and result.index_set != self.index_set: self.index_set = None - if ((is_number(result) and self.dim != 1) or - (is_numeric_vector(result) and self.dim != len(result))): + if (is_number(result) and self.dim != 1) or ( + is_numeric_vector(result) and self.dim != len(result) + ): self.dim = None def _set_array(self): @@ -381,12 +363,12 @@ def _set_array(self): else: raise Exception( "This operation is only possible with results " - "of consistent dimension.") + "of consistent dimension." + ) # The Statistical superclass will use this to define all of the # usual comparison operations (e.g., <, >, ==, !=, etc.). def _statistic_factory(self, op): - def _op_func(self): self._set_array() if self.dim == 1: @@ -394,8 +376,10 @@ def _op_func(self): elif self.dim is not None: return Vector(op(a=self.array, axis=0)) elif self.index_set is not None: + def _func(t): return _op_func(self[t]) + return TimeFunction.from_index_set(self.index_set, _func) raise NotImplementedError( "Statistics can only be calculated for numerical " @@ -405,7 +389,6 @@ def _func(t): return _op_func def _multivariate_statistic_factory(self, op): - def _op_func(self): self._set_array() if self.dim == 2: @@ -415,7 +398,8 @@ def _op_func(self): elif self.dim == 1: raise Exception( "This multivariate statistic is only defined when " - "when there are at least 2 dimensions.") + "when there are at least 2 dimensions." + ) raise NotImplementedError( "Statistics can only be calculated for numerical " "data of consistent dimension." @@ -434,12 +418,13 @@ def standardize(self): return (self - self.mean()) / self.std() else: raise Exception("Could not standardize the given results.") - + def tabulate(self, outcomes=None, normalize=False): - return Table(self._get_counts(), outcomes, normalize, "Value") + return Table(self._get_counts(), outcomes, normalize, "Value") - def plot(self, type=None, alpha=None, normalize=True, jitter=False, - bins=None, **kwargs): + def plot( + self, type=None, alpha=None, normalize=True, jitter=False, bins=None, **kwargs + ): if type is not None: if isinstance(type, str): type = (type,) @@ -454,9 +439,9 @@ def plot(self, type=None, alpha=None, normalize=True, jitter=False, counts = self._get_counts() discrete = is_discrete(counts.values()) if type is None: - type = ("impulse", ) if discrete else ("hist", ) + type = ("impulse",) if discrete else ("hist",) if alpha is None: - alpha = .5 + alpha = 0.5 if bins is None: bins = 30 n = len(self) @@ -466,42 +451,49 @@ def plot(self, type=None, alpha=None, normalize=True, jitter=False, ax = plt.gca() color = get_next_color(ax) - if 'density' in type: + if "density" in type: if discrete: xs = sorted(list(counts.keys())) probs = [counts[x] / n for x in xs] - ax.plot(xs, probs, marker='o', color=color, linestyle='-') + ax.plot(xs, probs, marker="o", color=color, linestyle="-") if len(type) == 1: - plt.ylabel('Relative Frequency') + plt.ylabel("Relative Frequency") else: density = compute_density(self.array) xs = np.linspace(self.array.min(), self.array.max(), 1000) ax.plot(xs, density(xs), linewidth=2, color=color) - if len(type) == 1 or (len(type) == 2 and 'rug' in type): - plt.ylabel('Density') - - if 'hist' in type or 'bar' in type: - ax.hist(self.array, bins=bins, density=normalize, - color=color, alpha=alpha, **kwargs) + if len(type) == 1 or (len(type) == 2 and "rug" in type): + plt.ylabel("Density") + + if "hist" in type or "bar" in type: + ax.hist( + self.array, + bins=bins, + density=normalize, + color=color, + alpha=alpha, + **kwargs, + ) plt.ylabel("Density" if normalize else "Count") - elif 'impulse' in type: + elif "impulse" in type: xs = list(counts.keys()) freqs = list(counts.values()) if normalize: freqs = [freq / n for freq in freqs] if jitter: - a = .02 * (max(xs) - min(xs)) + a = 0.02 * (max(xs) - min(xs)) xs = [x + np.random.uniform(low=-a, high=a) for x in xs] # plot the impulses ax.vlines(xs, 0, freqs, color=color, alpha=alpha, **kwargs) - configure_axes(ax, xs, freqs, - ylabel="Relative Frequency" if normalize else "Count") - if 'rug' in type: + configure_axes( + ax, xs, freqs, ylabel="Relative Frequency" if normalize else "Count" + ) + if "rug" in type: xs = self.array if discrete: - noise_level = .002 * (self.array.max() - self.array.min()) + noise_level = 0.002 * (self.array.max() - self.array.min()) xs = xs + np.random.normal(scale=noise_level, size=n) - ax.plot(xs, [0.001] * n, '|', linewidth=5, color='k') + ax.plot(xs, [0.001] * n, "|", linewidth=5, color="k") if len(type) == 1: setup_ticks([], [], ax.yaxis) elif self.dim == 2: @@ -519,38 +511,61 @@ def plot(self, type=None, alpha=None, normalize=True, jitter=False, if type is None: type = ("scatter",) if alpha is None: - alpha = .5 + alpha = 0.5 if bins is None: - bins = 10 if 'tile' in type else 30 + bins = 10 if "tile" in type else 30 - if 'marginal' in type: + if "marginal" in type: fig = plt.gcf() gs = GridSpec(4, 4) ax = fig.add_subplot(gs[1:4, 0:3]) ax_marg_x = fig.add_subplot(gs[0, 0:3]) ax_marg_y = fig.add_subplot(gs[1:4, 3]) color = get_next_color(ax) - if 'density' in type: + if "density" in type: densityX = compute_density(x) densityY = compute_density(y) x_lines = np.linspace(min(x), max(x), 1000) y_lines = np.linspace(min(y), max(y), 1000) - ax_marg_x.plot(x_lines, densityX(x_lines), linewidth=2, - color=get_next_color(ax)) - ax_marg_y.plot(y_lines, densityY(y_lines), linewidth=2, - color=get_next_color(ax), - transform=Affine2D().rotate_deg(270) + ax_marg_y.transData) + ax_marg_x.plot( + x_lines, + densityX(x_lines), + linewidth=2, + color=get_next_color(ax), + ) + ax_marg_y.plot( + y_lines, + densityY(y_lines), + linewidth=2, + color=get_next_color(ax), + transform=Affine2D().rotate_deg(270) + ax_marg_y.transData, + ) else: if discrete_x: - make_marginal_impulse(x_count, get_next_color(ax), ax_marg_x, alpha, 'x') + make_marginal_impulse( + x_count, get_next_color(ax), ax_marg_x, alpha, "x" + ) else: - ax_marg_x.hist(x, color=get_next_color(ax), density=normalize, - alpha=alpha, bins=bins) + ax_marg_x.hist( + x, + color=get_next_color(ax), + density=normalize, + alpha=alpha, + bins=bins, + ) if discrete_y: - make_marginal_impulse(y_count, get_next_color(ax), ax_marg_y, alpha, 'y') + make_marginal_impulse( + y_count, get_next_color(ax), ax_marg_y, alpha, "y" + ) else: - ax_marg_y.hist(y, color=get_next_color(ax), density=normalize, - alpha=alpha, bins=bins, orientation='horizontal') + ax_marg_y.hist( + y, + color=get_next_color(ax), + density=normalize, + alpha=alpha, + bins=bins, + orientation="horizontal", + ) plt.setp(ax_marg_x.get_xticklabels(), visible=False) plt.setp(ax_marg_y.get_yticklabels(), visible=False) else: @@ -558,40 +573,44 @@ def plot(self, type=None, alpha=None, normalize=True, jitter=False, ax = plt.gca() color = get_next_color(ax) - nullfmt = NullFormatter() #removes labels on fig + nullfmt = NullFormatter() # removes labels on fig - if 'scatter' in type: + if "scatter" in type: if jitter: - x = x + np.random.normal(loc=0, scale=.01 * (x.max() - x.min()), size=len(x)) - y = y + np.random.normal(loc=0, scale=.01 * (y.max() - y.min()), size=len(y)) + x = x + np.random.normal( + loc=0, scale=0.01 * (x.max() - x.min()), size=len(x) + ) + y = y + np.random.normal( + loc=0, scale=0.01 * (y.max() - y.min()), size=len(y) + ) ax.scatter(x, y, alpha=alpha, c=color, **kwargs) - elif 'hist' in type: - histo = ax.hist2d(x, y, bins=bins, cmap='Blues') + elif "hist" in type: + histo = ax.hist2d(x, y, bins=bins, cmap="Blues") # When normalize=True, use density instead of counts if normalize: - caxes = add_colorbar(fig, type, histo[3], 'Density') - #change scale to density instead of counts + caxes = add_colorbar(fig, type, histo[3], "Density") + # change scale to density instead of counts plt.draw() new_labels = [] for label in caxes.get_yticklabels(): new_labels.append(int(label.get_text()) / len(x)) caxes.set_yticklabels(new_labels) else: - caxes = add_colorbar(fig, type, histo[3], 'Count') - elif 'density' in type: + caxes = add_colorbar(fig, type, histo[3], "Count") + elif "density" in type: den = make_density2D(x, y, ax) - add_colorbar(fig, type, den, 'Density') - elif 'tile' in type: + add_colorbar(fig, type, den, "Density") + elif "tile" in type: hm = make_tile(x, y, bins, discrete_x, discrete_y, ax) - add_colorbar(fig, type, hm, 'Relative Frequency') - elif 'violin' in type: + add_colorbar(fig, type, hm, "Relative Frequency") + elif "violin" in type: if discrete_x and not discrete_y: positions = sorted(list(x_count.keys())) - make_violin(self.array, positions, ax, 'x', alpha) + make_violin(self.array, positions, ax, "x", alpha) elif not discrete_x and discrete_y: positions = sorted(list(y_count.keys())) - make_violin(self.array, positions, ax, 'y', alpha) + make_violin(self.array, positions, ax, "y", alpha) else: if alpha is None: alpha = np.log(2) / np.log(len(self) + 1) diff --git a/symbulate/table.py b/symbulate/table.py index 4382196..e0443c6 100644 --- a/symbulate/table.py +++ b/symbulate/table.py @@ -5,29 +5,17 @@ the possible outcomes and their counts or relative frequencies. """ from .base import Arithmetic - - -TABLE_TEMPLATE = ''' - - - - - - - {table_body} - -
{outcome_column}{value_column}
-''' - - -def _get_row_html(outcome, count): - return "%s%s" % (outcome, count) +from rich.table import Table as RichTable class Table(dict, Arithmetic): - - def __init__(self, hash_map, outcomes=None, normalize=False, - outcome_column="Outcome"): + def __init__( + self, + hash_map, + outcomes=None, + normalize=False, + outcome_column="Outcome", + ): self.outcomes = outcomes self.outcome_column = outcome_column if outcomes is None: @@ -35,18 +23,15 @@ def __init__(self, hash_map, outcomes=None, normalize=False, self[outcome] = count else: for outcome in outcomes: - self[outcome] = ( - hash_map[outcome] if outcome in hash_map - else 0 - ) - + self[outcome] = hash_map[outcome] if outcome in hash_map else 0 + if normalize: for key in self.ordered_keys(): self[key] /= sum(hash_map.values()) - self.value_column = 'Relative Frequency' + self.value_column = "Relative Frequency" else: - self.value_column = 'Frequency' - + self.value_column = "Frequency" + def ordered_keys(self): # get keys in order if self.outcomes is None: @@ -60,7 +45,7 @@ def ordered_keys(self): keys = self.outcomes return keys - + def __repr__(self): keys = self.ordered_keys() keys_strings = [str(x) for x in keys] @@ -70,63 +55,56 @@ def __repr__(self): for i, key in enumerate(keys): if len(str(key)) <= len(self.outcome_column): - outcome_space = ' ' * (len(self.outcome_column) - len(str(key))) + outcome_space = " " * (len(self.outcome_column) - len(str(key))) else: - outcome_space = ' ' * (max_key_length - len(str(key))) + outcome_space = " " * (max_key_length - len(str(key))) table_rows.append(f"{key}{outcome_space} {self[key]}") if i >= 18: last_outcome = str(keys[-1]) last_value = str(self[keys[-1]]) - table_rows.append(f"{'.' * len(last_outcome)}{outcome_space} " - f"{'.' * len(last_value)}") - table_rows.append(f"{last_outcome}{outcome_space} " - f"{last_value}") + table_rows.append( + f"{'.' * len(last_outcome)}{outcome_space} " + f"{'.' * len(last_value)}" + ) + table_rows.append(f"{last_outcome}{outcome_space} " f"{last_value}") break if max_key_length <= len(self.outcome_column): - outcome_header_space = ' ' - total_row_space = ' ' * (len(self.outcome_column) - len('Total')) + outcome_header_space = " " + total_row_space = " " * (len(self.outcome_column) - len("Total")) else: - outcome_header_space = ' ' * (max_key_length - - len(self.outcome_column) + 1) - total_row_space = ' ' * (max_key_length - len('Total')) + outcome_header_space = " " * (max_key_length - len(self.outcome_column) + 1) + total_row_space = " " * (max_key_length - len("Total")) total = str(sum(self.values())) table_rows.append(f"{total_row_space}Total {total}") - table_rows.insert(0, f"{self.outcome_column}{outcome_header_space}" - f"{self.value_column}") + table_rows.insert( + 0, f"{self.outcome_column}{outcome_header_space}" f"{self.value_column}" + ) - return '\n'.join(table_rows) + return "\n".join(table_rows) - def _repr_html_(self): + def __rich__(self): keys = self.ordered_keys() - # get HTML for table body - table_body = "" + rich_table = RichTable(self.outcome_column, self.value_column) + + num_of_entries = len(keys) for i, key in enumerate(keys): - table_body += _get_row_html(key, self[key]) - # if we've already printed 19 rows, skip to end - if i >= 18: - table_body += _get_row_html("...", "...") - table_body += _get_row_html(keys[-1], self[keys[-1]]) - break - total = str(sum(self.values())) - table_body += _get_row_html("Total", "%s" % total) + last_section = i == num_of_entries - 1 + rich_table.add_row(str(key), str(self[key]), end_section=last_section) - # return HTML for entire table - return TABLE_TEMPLATE.format(outcome_column = self.outcome_column, - value_column = self.value_column, - table_body=table_body) + rich_table.add_row("Total", str(sum(self.values()))) + return rich_table # The Arithmetic superclass will use this to define all of the # usual arithmetic operations (e.g., +, -, *, /, **, ^, etc.). def _operation_factory(self, op): - def _op_func(self, other): return Table( {outcome: op(count, other) for outcome, count in self.items()}, - self.outcomes + self.outcomes, ) return _op_func diff --git a/symbulate/tests/test_distributions.py b/symbulate/tests/test_distributions.py index aed833b..1870fea 100644 --- a/symbulate/tests/test_distributions.py +++ b/symbulate/tests/test_distributions.py @@ -8,19 +8,18 @@ class TestBernoulli(unittest.TestCase): - def test_p_one(self): X = RV(Bernoulli(p=1)) sims = X.sim(Nsim) self.assertTrue(all(sim == 1 for sim in sims)) - + def test_sum(self): exp_list, obs_list = [], [] - X = RV(Bernoulli(p=.4) ** 5) + X = RV(Bernoulli(p=0.4) ** 5) sims = X.apply(sum).sim(Nsim) simulated = sims.tabulate() for k in range(6): - expected = Nsim * stats.binom(n=5, p=.4).pmf(k) + expected = Nsim * stats.binom(n=5, p=0.4).pmf(k) if expected > 5: exp_list.append(expected) obs_list.append(simulated[k]) @@ -28,21 +27,20 @@ def test_sum(self): self.assertTrue(pval > 0.01) def test_Bernoulli_Binomial_n_1(self): - exp_list, obs_list = [], [] - X = RV(Bernoulli(p=.4)) + exp_list, obs_list = [], [] + X = RV(Bernoulli(p=0.4)) sims = X.sim(Nsim) simulated = sims.tabulate() - for k in range(2): - expected = Nsim * stats.binom(n=1, p=.4).pmf(k) + for k in range(2): + expected = Nsim * stats.binom(n=1, p=0.4).pmf(k) if expected > 5: exp_list.append(expected) obs_list.append(simulated[k]) pval = stats.chisquare(obs_list, exp_list).pvalue self.assertTrue(pval > 0.01) - + class TestBinomial(unittest.TestCase): - def test_Binomial_p_1(self): for nsample in range(1, 1000, 100): X = RV(Binomial(n=nsample, p=1.0)) @@ -51,23 +49,22 @@ def test_Binomial_p_1(self): def test_Binomial_error_n(self): self.assertRaises(Exception, lambda: Binomial(n=-10, p=0.4)) - + def test_Binomial_additive(self): exp_list, obs_list = [], [] X, Y = RV(Binomial(n=8, p=0.6) * Binomial(n=5, p=0.6)) sims = (X & Y).sim(Nsim).apply(sum) simulated = sims.tabulate() for k in range(2, 14): - expected = Nsim * stats.binom(n=13, p=.6).pmf(k) + expected = Nsim * stats.binom(n=13, p=0.6).pmf(k) if expected > 5: exp_list.append(expected) obs_list.append(simulated[k]) - pval = stats.chisquare(obs_list, exp_list).pvalue + pval = stats.chisquare(obs_list, exp_list).pvalue self.assertTrue(pval > 0.01) - -class TestHypergeometric(unittest.TestCase): +class TestHypergeometric(unittest.TestCase): def test_Hypergeometric_no_failures(self): X = RV(Hypergeometric(n=10, N0=0, N1=1000)) sims = X.sim(Nsim) @@ -79,7 +76,7 @@ def test_Hypergeometric_Binomial_converge(self): sims = X.sim(Nsim) simulated = sims.tabulate() for k in range(9): - expected = Nsim * stats.binom(n=8, p=.8).pmf(k) + expected = Nsim * stats.binom(n=8, p=0.8).pmf(k) if expected > 5: exp_list.append(expected) obs_list.append(simulated[k]) @@ -87,12 +84,10 @@ def test_Hypergeometric_Binomial_converge(self): self.assertTrue(pval > 0.01) def test_Hypergeometric_error_n_greater(self): - self.assertRaises(Exception, - lambda: Hypergeometric(n=10, N0=1, N1=8)) + self.assertRaises(Exception, lambda: Hypergeometric(n=10, N0=1, N1=8)) class TestGeometric(unittest.TestCase): - def test_Geometric_error(self): self.assertRaises(Exception, lambda: Geometric(p=0)) @@ -102,25 +97,23 @@ def test_Geometric_to_NBinom(self): sims = X.sim(Nsim) simulated = sims.tabulate() for k in range(1, 10): - expected = Nsim * stats.nbinom(n=1,p=0.8).pmf(k-1) + expected = Nsim * stats.nbinom(n=1, p=0.8).pmf(k - 1) if expected > 5: - exp_list.append(expected) - obs_list.append(simulated[k]) + exp_list.append(expected) + obs_list.append(simulated[k]) pval = stats.chisquare(obs_list, exp_list).pvalue self.assertTrue(pval > 0.01) class TestNegativeBinomial(unittest.TestCase): - def test_NBinom_error_r(self): - self.assertRaises(Exception, - lambda: NegativeBinomial(r=-10, p=0.6)) - + self.assertRaises(Exception, lambda: NegativeBinomial(r=-10, p=0.6)) + def test_NBinom_p_1(self): X = NegativeBinomial(r=10, p=1) sims = X.sim(Nsim) self.assertTrue(all(sim == 10 for sim in sims)) - + def test_NBinom_Pascal_additive(self): exp_list, obs_list = [], [] X, Y = RV(Pascal(r=4, p=0.6) * Pascal(r=6, p=0.6)) @@ -129,10 +122,10 @@ def test_NBinom_Pascal_additive(self): for k in range(10, 35): expected = Nsim * stats.nbinom(n=10, p=0.6).pmf(k) if expected > 5: - exp_list.append(expected) + exp_list.append(expected) obs_list.append(simulated[k]) pval = stats.chisquare(obs_list, exp_list).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_NBinom_to_Geometric(self): exp_list, obs_list = [], [] @@ -144,15 +137,14 @@ def test_NBinom_to_Geometric(self): if expected > 5: exp_list.append(expected) obs_list.append(simulated[k]) - pval = stats.chisquare(obs_list, exp_list).pvalue + pval = stats.chisquare(obs_list, exp_list).pvalue self.assertTrue(pval > 0.01) class TestPascal(unittest.TestCase): - def test_Pascal_error_r(self): self.assertRaises(Exception, lambda: Pascal(r=0, p=0.3)) - + def test_Pascal_p_1(self): X = Pascal(r=10, p=1.0) sims = X.sim(Nsim) @@ -160,7 +152,6 @@ def test_Pascal_p_1(self): class TestPoisson(unittest.TestCase): - def test_Poisson_error(self): self.assertRaises(Exception, lambda: Poisson(lam=0)) @@ -175,12 +166,12 @@ def test_Poisson_additive(self): exp_list.append(expected) obs_list.append(simulated[k]) pval = stats.chisquare(obs_list, exp_list).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_conditional_Poisson_add(self): obs_list, exp_list = [], [] - X,Y = RV(Poisson(lam=6) * Poisson(lam=7)) - sims = (X|(X+Y == 12)).sim(Nsim) + X, Y = RV(Poisson(lam=6) * Poisson(lam=7)) + sims = (X | (X + Y == 12)).sim(Nsim) simulated = sims.tabulate() for k in range(12): expected = Nsim * stats.binom(n=12, p=6 / 13).pmf(k) @@ -188,11 +179,10 @@ def test_conditional_Poisson_add(self): exp_list.append(expected) obs_list.append(simulated[k]) pval = stats.chisquare(obs_list, exp_list).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) class TestUniform(unittest.TestCase): - def test_Uniform_error(self): self.assertRaises(Exception, lambda: Uniform(a=6, b=-1)) @@ -208,26 +198,26 @@ def test_Uniform_to_ChiSquare(self): sims = (-2 * log(X)).sim(Nsim) cdf = stats.chi2(df=2).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Uniform_to_Exponential(self): X = RV(Uniform(a=0, b=1)) Y = -1 / 5 * log(X) sims = Y.sim(Nsim) - cdf = stats.expon(scale=1/5).cdf + cdf = stats.expon(scale=1 / 5).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Uniform_to_Beta(self): X = RV(Uniform(a=0, b=1)) sims = (X ** 15).sim(Nsim) - cdf = stats.beta(a=1/15, b=1).cdf + cdf = stats.beta(a=1 / 15, b=1).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Uniform_to_Cauchy(self): X = RV(Uniform(a=0, b=1)) - sims = (pi * (X - 1/2)).apply(tan).sim(Nsim) + sims = (pi * (X - 1 / 2)).apply(tan).sim(Nsim) cdf = stats.cauchy(loc=0, scale=1).cdf pval = stats.kstest(sims, cdf).pvalue self.assertTrue(pval > 0.01) @@ -237,34 +227,33 @@ def test_Uniform_to_Pareto(self): sims = (2 * X ** (-1 / 0.1)).sim(10000) cdf = stats.pareto(b=0.1, loc=0, scale=2).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) - -class TestNormal(unittest.TestCase): + self.assertTrue(pval > 0.01) + +class TestNormal(unittest.TestCase): def test_Normal_error(self): - self.assertRaises(Exception, lambda: Normal(mean=0, var=-10)) - + self.assertRaises(Exception, lambda: Normal(mean=0, var=-10)) + def test_sum(self): X = RV(Normal(mean=-1, sd=2) ** 3) sims = X.apply(sum).sim(Nsim) - cdf = stats.norm(loc=-3, - scale=np.sqrt(12)).cdf + cdf = stats.norm(loc=-3, scale=np.sqrt(12)).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_sum_Standard_Normal(self): X, Y = RV(Normal(mean=0, var=1) ** 2) sims = (X + Y).sim(Nsim) cdf = stats.norm(loc=0, scale=sqrt(2)).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_subtract_Standard_Normal(self): X, Y = RV(Normal(mean=0, var=1) ** 2) sims = (X - Y).sim(Nsim) cdf = stats.norm(loc=0, scale=sqrt(2)).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Normal_standardize(self): X = RV(Normal(mean=8, var=4)) @@ -272,7 +261,7 @@ def test_Normal_standardize(self): sims = X_stand.sim(Nsim) cdf = stats.norm(loc=0, scale=1).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_standardize_to_Normal(self): Z = RV(Normal(mean=0, sd=1)) @@ -280,15 +269,15 @@ def test_standardize_to_Normal(self): sims = X.sim(Nsim) cdf = stats.norm(loc=10, scale=5).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Normal_to_Gamma(self): X = RV(Normal(mean=0, var=1)) X = X ** 2 sims = X.sim(Nsim) - cdf = stats.gamma(a=1/2, scale=2).cdf + cdf = stats.gamma(a=1 / 2, scale=2).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Normal_to_ChiSquare(self): X = RV(Normal(mean=0, var=1)) @@ -296,86 +285,86 @@ def test_Normal_to_ChiSquare(self): sims = X.sim(Nsim) cdf = stats.chi2(df=1).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Normal_to_Cauchy(self): X, Y = RV(Normal(mean=0, var=1) ** 2) sims = (X / Y).sim(Nsim) cdf = stats.cauchy(loc=0, scale=1).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Normal_to_F(self): A, B, C, V, W, X, Y, Z = RV(Normal(mean=0, var=1) ** 8) - sims = ((((A ** 2) + (B ** 2) + (C ** 2)) / 3) / - (((V ** 2) + (W ** 2) + (X ** 2) + (Y ** 2) + (Z ** 2)) - / 5)).sim(Nsim) + sims = ( + (((A ** 2) + (B ** 2) + (C ** 2)) / 3) + / (((V ** 2) + (W ** 2) + (X ** 2) + (Y ** 2) + (Z ** 2)) / 5) + ).sim(Nsim) cdf = stats.f(dfn=3, dfd=5).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_sum_Normal_to_ChiSquare(self): X, Y, Z, A, B = RV(Normal(mean=0, var=1) ** 5) sims = ((X ** 2) + (Y ** 2) + (Z ** 2) + (A ** 2) + (B ** 2)).sim(Nsim) cdf = stats.chi2(df=5).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) + - class TestExponential(unittest.TestCase): - def test_Exponential_error(self): self.assertRaises(Exception, lambda: Exponential(rate=-5)) def test_Exponential_to_Gamma(self): X = RV(Exponential(rate=0.9)) sims = X.sim(Nsim) - cdf = stats.gamma(scale=1/0.9, a=1).cdf + cdf = stats.gamma(scale=1 / 0.9, a=1).cdf pval = stats.kstest(sims, cdf).pvalue self.assertTrue(pval > 0.01) def test_Exponential_sum_Gamma(self): X, Y, Z, A = RV(Exponential(rate=0.9) ** 4) sims = (X + Y + Z + A).sim(Nsim) - cdf = stats.gamma(scale=1/0.9, a=4).cdf + cdf = stats.gamma(scale=1 / 0.9, a=4).cdf pval = stats.kstest(sims, cdf).pvalue self.assertTrue(pval > 0.01) def test_Exponential_to_ChiSquare(self): - X = RV(Exponential(rate=1/2)) + X = RV(Exponential(rate=1 / 2)) sims = X.sim(Nsim) cdf = stats.chi2(df=2).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Exponential_to_Pareto(self): X = RV(Exponential(rate=2)) sims = (3 * exp(X)).sim(Nsim) cdf = stats.pareto(b=2, loc=0, scale=3).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Exponential_to_Weibull(self): X = RV(Exponential(rate=5)) sims = X.sim(Nsim) - cdf= stats.frechet_r(scale=1/5, c=1).cdf + cdf = stats.frechet_r(scale=1 / 5, c=1).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Exponential_to_Rayleigh(self): X = RV(Exponential(rate=5)) sims = sqrt(X).sim(Nsim) cdf = stats.rayleigh(scale=1 / sqrt(2 * 5)).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Poisson_Exponential_to_Geometric(self): def poisson_exp(): - x = Exponential(rate=1 / lam).draw() + x = Exponential(rate=1 / lam).draw() z = RV(Poisson(x)).draw() return z - exp_list, obs_list, lam = [], [], 1/5 + exp_list, obs_list, lam = [], [], 1 / 5 P = ProbabilitySpace(poisson_exp) A = RV(P) sims = (A + 1).sim(Nsim) @@ -386,11 +375,10 @@ def poisson_exp(): exp_list.append(expected) obs_list.append(simulated[k]) pval = stats.chisquare(obs_list, exp_list).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) class TestGamma(unittest.TestCase): - def test_Gamma_shape_error(self): self.assertRaises(Exception, lambda: Gamma(shape=-5, rate=40)) @@ -398,51 +386,49 @@ def test_Gamma_rate_error(self): self.assertRaises(Exception, lambda: Gamma(shape=4, rate=-10)) def test_Gamma_to_Exponential(self): - X = Gamma(shape=1, rate=1/ 0.9) + X = Gamma(shape=1, rate=1 / 0.9) sims = X.sim(Nsim) cdf = stats.expon(scale=0.9).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) - + self.assertTrue(pval > 0.01) + def test_Gamma_reshape(self): X = RV(Gamma(shape=9, scale=4)) sims = (X * 8).sim(Nsim) cdf = stats.gamma(scale=4 * 8, a=9).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) - + self.assertTrue(pval > 0.01) + def test_Gamma_additive(self): - X, Y = RV(Gamma(shape=10, scale=0.5) * - Gamma(shape=8, scale=0.5)) + X, Y = RV(Gamma(shape=10, scale=0.5) * Gamma(shape=8, scale=0.5)) sims = (X + Y).sim(Nsim) cdf = stats.gamma(scale=0.5, a=18).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Gamma_to_Beta(self): X, Y = RV(Gamma(shape=5, scale=8) * Gamma(shape=4, scale=8)) sims = (X / (X + Y)).sim(Nsim) cdf = stats.beta(a=5, b=4).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Gamma_to_F(self): X, Y = RV(Gamma(shape=2, rate=5) * Gamma(shape=4, rate=7)) sims = ((4 * 5 * X) / (2 * 7 * Y)).sim(Nsim) cdf = stats.f(dfn=2 * 2, dfd=2 * 4).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Gamma_to_ChiSquare(self): - X = RV(Gamma(shape=10/2, scale=2)) + X = RV(Gamma(shape=10 / 2, scale=2)) sims = X.sim(Nsim) cdf = stats.chi2(df=10).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) class TestBeta(unittest.TestCase): - def test_Beta_error_a(self): self.assertRaises(Exception, lambda: Beta(a=-10, b=3)) @@ -454,59 +440,57 @@ def test_Beta_to_Uniform(self): sims = X.sim(Nsim) cdf = stats.uniform(loc=0, scale=1).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Beta_symmetry(self): X = RV(Beta(a=4, b=5)) sims = (1 - X).sim(Nsim) cdf = stats.beta(a=5, b=4).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Beta_to_Exponential(self): X = RV(Beta(a=0.7, b=1)) sims = (-log(X)).sim(Nsim) cdf = stats.expon(scale=1 / 0.7).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Beta_to_F(self): - X = RV(Beta(a=10/2, b=12/2)) + X = RV(Beta(a=10 / 2, b=12 / 2)) sims = (12 * X / (10 * (1 - X))).sim(Nsim) cdf = stats.f(dfn=10, dfd=12).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) class TestStudentT(unittest.TestCase): - def test_StudentT_df_error(self): self.assertRaises(Exception, lambda: StudentT(df=0)) def test_StudentT_to_Normal(self): X = StudentT(df=Nsim) sims = X.sim(Nsim) - cdf = stats.norm(loc=0,scale=1).cdf + cdf = stats.norm(loc=0, scale=1).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Normal_ChiSquare_to_StudentT(self): X, Y = RV(Normal(mean=0, var=1) * ChiSquare(df=5)) sims = (X / sqrt(Y / 5)).sim(Nsim) cdf = stats.t(df=5).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) class TestChiSquare(unittest.TestCase): - def test_ChiSquare_error(self): self.assertRaises(Exception, lambda: ChiSquare(df=0.5)) def test_ChiSquare_to_Gamma(self): X = RV(ChiSquare(df=10)) sims = X.sim(Nsim) - cdf = stats.gamma(a=5, scale=1/0.5).cdf + cdf = stats.gamma(a=5, scale=1 / 0.5).cdf pval = stats.kstest(sims, cdf).pvalue self.assertTrue(pval > 0.01) @@ -515,18 +499,17 @@ def test_ChiSquare_to_F(self): sims = ((X / 3) / (Y / 5)).sim(Nsim) cdf = stats.f(dfn=3, dfd=5).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_ChiSquare_to_Beta(self): X, Y = RV(ChiSquare(df=4) * ChiSquare(df=5)) sims = (X / (X + Y)).sim(Nsim) - cdf = stats.beta(a=4/2, b=5/2).cdf + cdf = stats.beta(a=4 / 2, b=5 / 2).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) - + self.assertTrue(pval > 0.01) -class TestF(unittest.TestCase): +class TestF(unittest.TestCase): def test_F_error(self): self.assertRaises(Exception, lambda: F(dfN=0, dfD=5)) @@ -535,25 +518,24 @@ def test_inverse_T(self): sims = (1 / X).sim(Nsim) cdf = stats.f(dfn=8, dfd=4).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_StudentT_to_F(self): X = RV(StudentT(df=15)) sims = (X ** 2).sim(Nsim) cdf = stats.f(dfn=1, dfd=15).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_F_to_Beta(self): X = RV(F(dfN=5, dfD=8)) sims = ((5 * X / 8) / (1 + (5 * X / 8))).sim(Nsim) - cdf = stats.beta(a=5/2, b=8/2).cdf + cdf = stats.beta(a=5 / 2, b=8 / 2).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) class TestCauchy(unittest.TestCase): - def test_Cauchy_mean(self): X = Cauchy() math.isnan(X.mean()) @@ -573,7 +555,7 @@ def test_Cauchy_inverse(self): self.assertTrue(pval > 0.01) def test_Cauchy_additive(self): - X, Y = RV(Cauchy()**2) + X, Y = RV(Cauchy() ** 2) sims = (X + Y).sim(Nsim) cdf = stats.cauchy(loc=0, scale=2).cdf pval = stats.kstest(sims, cdf).pvalue @@ -581,93 +563,95 @@ def test_Cauchy_additive(self): class TestLognormal(unittest.TestCase): - def test_LogNormal_error(self): - self.assertRaises(Exception, lambda: LogNormal(mu=0, sigma=-5)) + self.assertRaises(Exception, lambda: LogNormal(mu=0, sigma=-5)) def test_LogNormal_to_Normal(self): X = LogNormal(mu=10, sigma=5) sims = X.sim(Nsim).apply(log) cdf = stats.norm(loc=10, scale=5).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Normal_to_LogNormal(self): X = RV(Normal(mean=10, sd=5)) sims = X.apply(exp).sim(Nsim) cdf = stats.lognorm(s=5, scale=exp(10)).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_LogNormal_Product(self): X, Y = RV(LogNormal(mu=10, sigma=5) * LogNormal(mu=11, sigma=6)) sims = (X * Y).sim(Nsim) cdf = stats.lognorm(s=sqrt(25 + 36), scale=exp(21)).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) -class TestPareto(unittest.TestCase): +class TestPareto(unittest.TestCase): def test_Pareto_check_mean(self): - x = stats.pareto(b=-3) + x = stats.pareto(b=-3) math.isnan(x.mean()) def test_Pareto_to_Exponential(self): X = RV(Pareto(b=1.5, scale=0.1)) sims = (log(X / 0.1)).sim(Nsim) - cdf = stats.expon(scale=1/1.5).cdf + cdf = stats.expon(scale=1 / 1.5).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) class TestWeibull(unittest.TestCase): def test_Weibull_to_Exponential(self): - '''X = RV(Weibull(scale=1, c=10)) - sims = X.sim(Nsim) - cdf = stats.expon(scale=1/10).cdf - pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) - ''' + """X = RV(Weibull(scale=1, c=10)) + sims = X.sim(Nsim) + cdf = stats.expon(scale=1/10).cdf + pval = stats.kstest(sims, cdf).pvalue + self.assertTrue(pval > .01) + """ pass -class TestRayleigh(unittest.TestCase): +class TestRayleigh(unittest.TestCase): def test_Rayleigh_Normal(self): A, B = RV(Normal(mean=0, var=1) * Normal(mean=0, var=1)) sims = (A ** 2 + B ** 2).apply(sqrt).sim(Nsim) cdf = stats.rayleigh.cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) def test_Rayleigh_to_Chi(self): X = RV(Rayleigh()) sims = X.sim(Nsim) cdf = stats.chi(df=2).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) + self.assertTrue(pval > 0.01) class TestMultivariateNormal(unittest.TestCase): - def test_MultivariateNormal_mean_cov_error(self): - self.assertRaises(Exception, - lambda: MultivariateNormal(mean=[2], cov=[[2,2], [3,4]])) + self.assertRaises( + Exception, lambda: MultivariateNormal(mean=[2], cov=[[2, 2], [3, 4]]) + ) def test_MultivariateNormal_cov_square_error(self): - self.assertRaises(Exception, - lambda: MultivariateNormal(mean=[2,4], cov=[[2,4,5],[2,1]])) + self.assertRaises( + Exception, lambda: MultivariateNormal(mean=[2, 4], cov=[[2, 4, 5], [2, 1]]) + ) class TestBivariateNormal(unittest.TestCase): - def test_BivariateNormal_error1(self): - self.assertRaises(Exception, lambda: - BivariateNormal(mean1=3, mean2=4, sd1=-3, sd2=3, corr=0.9)) + self.assertRaises( + Exception, + lambda: BivariateNormal(mean1=3, mean2=4, sd1=-3, sd2=3, corr=0.9), + ) def test_BivariateNormal_error2(self): - self.assertRaises(Exception, lambda: - BivariateNormal(mean1=3, mean2=4, sd1=3, sd2=3, corr=1.1)) - + self.assertRaises( + Exception, lambda: BivariateNormal(mean1=3, mean2=4, sd1=3, sd2=3, corr=1.1) + ) + def test_LinCom_BivNormal(self): X, Y = RV(BivariateNormal(mean1=30, mean2=50, sd1=8, sd2=6, corr=-0.4)) Z = 4 * X - 2 * Y @@ -676,15 +660,14 @@ def test_LinCom_BivNormal(self): sims = Z.sim(Nsim) cdf = stats.norm(loc=Z_mean, scale=sqrt(Z_var)).cdf pval = stats.kstest(sims, cdf).pvalue - self.assertTrue(pval > .01) - + self.assertTrue(pval > 0.01) + def test_BivNormal_condDistr_r(self): for c in [-0.9, 0.9, 0.1]: - X,Y = RV(BivariateNormal(mean1=20, mean2=10, sd1=3, sd2=5, corr=c)) + X, Y = RV(BivariateNormal(mean1=20, mean2=10, sd1=3, sd2=5, corr=c)) sims = (Y | (abs(X - 21) < 0.1)).sim(500) - cdf = stats.norm(loc=10 + c * 5 / 3 * (21 - 20), - scale=5 * sqrt(1 - c ** 2)).cdf + cdf = stats.norm( + loc=10 + c * 5 / 3 * (21 - 20), scale=5 * sqrt(1 - c ** 2) + ).cdf pval = stats.kstest(sims, cdf).pvalue self.assertTrue(pval > 0.01) - -