From f8c5a8dad1a5b8ac24481bce2abfa6ec1eb2d19d Mon Sep 17 00:00:00 2001 From: YURYVOM <112267599+YURYVOM@users.noreply.github.com> Date: Mon, 15 Jun 2026 09:46:39 -0500 Subject: [PATCH 1/3] Initial upload TeachingSampling 4.1.1 from CRAN --- DESCRIPTION | 12 +++--- MD5 | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++ data/datalist | 3 ++ 3 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 MD5 create mode 100644 data/datalist diff --git a/DESCRIPTION b/DESCRIPTION index 875eb3d..c09f3f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,15 +1,17 @@ Package: TeachingSampling Type: Package -Title: Selection of Samples and Parameter Estimation in Finite Population +Title: Selection of Samples and Parameter Estimation in Finite + Population License: GPL (>= 2) Version: 4.1.1 Date: 2020-04-21 Author: Hugo Andres Gutierrez Rojas Maintainer: Hugo Andres Gutierrez Rojas -Depends: - R (>= 3.5), - dplyr, - magrittr +Depends: R (>= 3.5), dplyr, magrittr Description: Allows the user to draw probabilistic samples and make inferences from a finite population based on several sampling designs. Encoding: UTF-8 RoxygenNote: 7.1.0 +NeedsCompilation: no +Packaged: 2020-04-21 19:54:19 UTC; psirusteam +Repository: CRAN +Date/Publication: 2020-04-21 21:50:03 UTC diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..580427f --- /dev/null +++ b/MD5 @@ -0,0 +1,109 @@ +66358cf7c53024ec81193d1dc3e0569b *DESCRIPTION +03a489847e34cfad29dcecf522546ad2 *NAMESPACE +f938e9e6702d4b0fa52b95d65aac141d *R/Deltakl.r +c26e89bf42e9d40f70570d3fa40a27c2 *R/Domains.r +40b73b94a4acf46b5722c835180306ee *R/E.1SI.R +116bb5940489342e04f10c1391d3a1ea *R/E.2SI.r +3993547547c42677d574b67e068ff1c6 *R/E.BE.r +74f73575e0674b0a511cec8fd5cebac6 *R/E.Beta.r +c5878e4f40b41fe5bdc049f2107655e8 *R/E.PO.r +fa086957dba0dc9461dd9afe6e29f472 *R/E.PPS.r +07d9f98380516cc37354931084ef0bbb *R/E.Quantile.r +b74b6fe31eef3a24f02da19c0709acc4 *R/E.SI.r +324cbb9d93937453dcad24b8898595d1 *R/E.STPPS.r +dd065f462d96b6dd51f23c79aa50582a *R/E.STSI.r +765918d8ac225e08327ae7d4a46474c9 *R/E.STpiPS.R +1d33422cba213f7f84f7ad08d6ca4c25 *R/E.SY.r +99f05cd36aa35e1a9ad2ce5e76549ed9 *R/E.Trim.R +1d2cd45881fe7bbc1c1bdf32baf09cdf *R/E.UC.R +8a2064359292bed6b2f2fc7474924db8 *R/E.WR.r +71eab9e832f9f3c818c8322878210457 *R/E.piPS.r +81b8bebb55cafaa7ee71dfe97a17da86 *R/GREG.SI.r +d9f8cb725f40d590c46474f55510926a *R/HH.r +7b872d442bc06ce59f9eebba986a5ef9 *R/HT.r +c89ad9b876a6968a859f59d09e0bfb41 *R/IPFP.r +4e1b0efff52308be2e76063169f14088 *R/Ik.r +e9f8c7793012416bc96749d06f881fb6 *R/IkRS.r +d6fbf9f7755197d5e01503b2779afcd0 *R/IkWR.r +dfe0453ed1492f48ecaabe5b07041be8 *R/OrderWR.r +2d562377091b4531f905002e1640564c *R/Pik.r +2d74e04c4c4b57505dbba5c33329f843 *R/PikHol.r +69c6c30af63a9700e6eae2a1abba9dff *R/PikPPS.r +17a2b362f02172eb7d2989373653ac4c *R/PikSTPPS.R +05379e9c1a66539d5f5c87039eb02d40 *R/Pikl.r +ef087cb60669f4a4c5ebd3476e9fa965 *R/S.BE.r +d67fac70bc69c91016ba12e4bbdb1d8a *R/S.PO.r +622fe8c6fa82c177e29e4e2bc391e42c *R/S.PPS.r +88994b84bc62e87fe27c6c99bd1cc7f0 *R/S.SI.r +3abe556531d56ce0784cb405cb328f8b *R/S.STPPS.r +5fef5aa981fc5448f21172134a03bad1 *R/S.STSI.r +58bc25b3f07d5fa2ddf1f55f517b49b9 *R/S.STpiPS.R +f062ea7a9c012873582593787987935f *R/S.SY.r +460e9a56bc05f70c33c33d676d53535d *R/S.WR.r +7af51e2628c17740d178b8b049114ef3 *R/S.piPS.r +377734cf568ed76166ee12e3226a2ae5 *R/Support.r +2fabe9d36da6f7feb5c3d39766a56f0e *R/SupportRS.r +001fb29fdd6e52e5fa0d40a361766bf9 *R/SupportWR.r +d750f48140381578e0bed68a28d52291 *R/T.SIC.r +7792dde0176e7325f6a7b68ee5975cf4 *R/VarHT.r +b1d88100a4b0461efc5848e279fe3dc1 *R/VarSYGHT.R +1b6177a35af9e154dab1fd22bd0d17fd *R/Wk.r +191ae0699d5dcd03af1ec13531aadb9a *R/nk.r +de888f35e509b387912e6050f39ed02e *R/p.WR.r +314e416cba9e6e087c234168f66c8414 *data/BigCity.RData +026dccba4d83fd7af4ae0959ce0b3a08 *data/BigLucy.rda +f85dfd8953fdc676d9e72ddae77d7d7c *data/Lucy.rda +4eaa2fcb1e8e43987bbe08240d382333 *data/datalist +9033d529a72a540333603708c41a7206 *man/BigCity.Rd +b789dba9b29d1abbc70f18c3889194cc *man/BigLucy.rd +b04673eca685a0f8ffdead3c99fd6119 *man/Deltakl.rd +157b1ac0e2e6d06a6e84016846506776 *man/Domains.rd +674b3c42319ecce2bad8839759b976a2 *man/E.1SI.Rd +effee56d0e3b263666a1e9e376ae8dc8 *man/E.2SI.rd +8d3233f1eee3bbe26e0d7038c0c70695 *man/E.BE.rd +611584d4132e1e87c0980d57ff58f011 *man/E.Beta.rd +a33aded02f9a922e542056c1d1d4b326 *man/E.PO.rd +37a83c27a51980fb31674448fa38a2db *man/E.PPS.rd +e5acc6bb09a2be266fc2f2d88eb27a79 *man/E.Quantile.rd +a6cfa5afed72295f8b61ddb2df9c65ef *man/E.SI.rd +0c9e1c8ac9423945742a875c08204921 *man/E.STPPS.rd +69b32ef51bccd2fbb219d6fb3c3c4e1a *man/E.STSI.rd +91f669f181b4fa5f7e4d7cf195e57b82 *man/E.STpiPS.Rd +b7673dda03958bf93b0d36dfbab9dc7c *man/E.SY.rd +18d48f27943cfcd1e7d19ad4f800d70c *man/E.Trim.Rd +7917bf3e94e5ebcd1099b5ca9201f70e *man/E.UC.Rd +2b311f1513524342923cc79ad1c6ad02 *man/E.WR.rd +ad18141f5df3a7c89484b1eded8dbf40 *man/E.piPS.rd +ca9123319cb2441952045b3345047dde *man/GREG.SI.rd +1884a7c9ea1b6897b52dda5b6ecb30bd *man/HH.rd +a80ca4c3d3e9c2cb9795066c617a042f *man/HT.rd +49afa6f71a05896264f189dc762699a0 *man/IPFP.rd +3e86adedcafb799683c80c15fd38b391 *man/Ik.rd +d4dd45b93c85b62f98e9c9c453c126cf *man/IkRS.rd +cf4caeac77c2005242c8f0d92829cb6e *man/IkWR.rd +bc1f82f2e43f38b8179148e644d66c33 *man/Lucy.rd +3ecc10be567e4caf48150b3bfbaa5cee *man/OrderWR.rd +23db45af738633074d96716167955543 *man/Pik.rd +344b2e01eda67cb995e2b93c25a5e3d8 *man/PikHol.rd +2262907a39785be07991c435f6f4ca16 *man/PikPPS.rd +01bed42e6eba9feca3041c00109445a6 *man/PikSTPPS.Rd +d893a402195006d0a7f5dfdaaf7f4f62 *man/Pikl.rd +153c1db83965ddc84cd6c591f927260f *man/S.BE.rd +a80b1f069dc66d2d9a1ceeec2002eacf *man/S.PO.rd +b98bc055ab1e176f812c14378d9c138d *man/S.PPS.rd +059c41796b4c2240b76fed2f90236106 *man/S.SI.rd +91ed9705e66a07755084eada3061ade2 *man/S.STPPS.rd +462b79ded52c76bb448796dc73412908 *man/S.STSI.rd +575b90af4f9bf0220114d3f61df3e9e5 *man/S.STpiPS.Rd +01d2ce9f1adc47f3152aacfbd49b3960 *man/S.SY.rd +6165fa6e4aba2ee1372cbdd806d746b4 *man/S.WR.rd +93cb3c2c714c71f5cbbb41ff18a07311 *man/S.piPS.rd +c5aed92af685317377d5dd2d9840bd66 *man/Support.rd +9b50c250efae5c9b213fe8a80053d7b8 *man/SupportRS.rd +ff19964b2f0fe87f38a1c5151f19b8eb *man/SupportWR.rd +0e90c70f5b928423c54409727ad0f8fa *man/T.SIC.rd +c4b964cbf99eb877ae9b76cbdef964c3 *man/VarHT.rd +cd2eece3bbf341c9086429ead6f569b3 *man/VarSYGHT.Rd +b34f77a1d5b9bd3defd66a1a7321f49f *man/Wk.rd +2cbe6fb4bad1dbb5db9c1dcf8bb1544f *man/nk.rd +8c0d3ae9dbdfffafa814e14db4f2ef7e *man/p.WR.rd diff --git a/data/datalist b/data/datalist new file mode 100644 index 0000000..9b7de3d --- /dev/null +++ b/data/datalist @@ -0,0 +1,3 @@ +BigCity +BigLucy +Lucy From 1ea1da45f48494dba80d5b0bb8dea8ed76ad9596 Mon Sep 17 00:00:00 2001 From: YURYVOM <112267599+YURYVOM@users.noreply.github.com> Date: Mon, 15 Jun 2026 10:31:17 -0500 Subject: [PATCH 2/3] add kish_allocation function and complete Roxygen2 documentation --- DESCRIPTION | 32 ++-- MD5 | 109 -------------- NAMESPACE | 1 + R/Deltakl.r | 51 ++++++- R/Domains.r | 58 ++++++-- R/E.2SI.r | 112 +++++++++++--- R/E.BE.r | 72 +++++++-- R/E.Beta.r | 116 ++++++++++----- R/E.PO.r | 73 +++++++-- R/E.PPS.r | 70 +++++++-- R/E.Quantile.r | 105 ++++++++----- R/E.SI.r | 72 +++++++-- R/E.SY.r | 71 +++++++-- R/E.WR.r | 71 +++++++-- R/E.piPS.r | 91 +++++++++--- R/GREG.SI.r | 91 +++++++++--- R/HT.r | 51 ++++++- R/IPFP.r | 102 +++++++++---- R/Ik.r | 59 ++++++-- R/IkRS.r | 44 +++++- R/IkWR.r | 61 ++++++-- R/OrderWR.r | 119 +++++++++------ R/Pik.r | 51 ++++++- R/PikHol.r | 64 ++++++-- R/PikPPS.r | 62 ++++++-- R/Pikl.r | 68 ++++++--- R/S.BE.r | 60 +++++++- R/S.PO.r | 53 ++++++- R/S.PPS.r | 63 ++++++-- R/S.SI.r | 63 ++++++-- R/S.STPPS.r | 107 ++++++++----- R/S.STSI.r | 72 ++++++--- R/S.STpiPS.R | 80 +++++++--- R/S.SY.r | 63 ++++++-- R/S.WR.r | 70 ++++++--- R/Support.r | 55 +++++-- R/SupportRS.r | 56 +++++-- R/SupportWR.r | 98 +++++++----- R/T.SIC.r | 78 +++++++--- R/VarHT.r | 60 ++++++-- R/Wk.r | 72 +++++++-- R/kish_allocation.R | 89 +++++++++++ R/nk.r | 58 ++++++-- R/p.WR.r | 55 +++++-- README.md | 162 ++++++++++++++++++-- man/BigCity.Rd | 52 ------- man/BigLucy.rd | 57 ------- man/Deltakl.rd | 66 +++++---- man/Domains.rd | 95 +++++------- man/E.2SI.rd | 199 +++++++++---------------- man/E.BE.rd | 75 ++++++---- man/E.Beta.rd | 152 +++++++------------ man/E.PO.rd | 78 +++++----- man/E.PPS.rd | 76 +++++----- man/E.Quantile.rd | 112 ++++++-------- man/E.SI.rd | 145 ++++++------------ man/E.STPPS.rd | 57 ------- man/E.STSI.rd | 73 --------- man/E.STpiPS.Rd | 62 -------- man/E.SY.rd | 78 ++++++---- man/E.WR.rd | 78 +++++----- man/E.piPS.rd | 85 ++++++----- man/GREG.SI.rd | 226 ++++++++-------------------- man/HH.rd | 131 ---------------- man/HT.rd | 330 +++++------------------------------------ man/IPFP.rd | 112 ++++++-------- man/Ik.rd | 58 +++++--- man/IkRS.rd | 55 ++++--- man/IkWR.rd | 58 +++++--- man/Lucy.rd | 54 ------- man/OrderWR.rd | 86 +++++------ man/Pik.rd | 72 +++++---- man/PikHol.rd | 141 ++++++------------ man/PikPPS.rd | 105 +++++-------- man/Pikl.rd | 68 +++++---- man/S.BE.rd | 92 ++++++------ man/S.PO.rd | 88 +++++------ man/S.PPS.rd | 88 ++++++----- man/S.SI.rd | 96 ++++++------ man/S.STPPS.rd | 110 ++++++-------- man/S.STSI.rd | 99 ++++++------- man/S.STpiPS.Rd | 123 +++++++-------- man/S.SY.rd | 81 +++++----- man/S.WR.rd | 76 +++++----- man/S.piPS.rd | 60 -------- man/Support.rd | 74 +++++---- man/SupportRS.rd | 66 +++++---- man/SupportWR.rd | 74 ++++----- man/T.SIC.rd | 112 ++++++-------- man/VarHT.rd | 73 +++++---- man/Wk.rd | 227 +++++++--------------------- man/kish_allocation.Rd | 71 +++++++++ man/nk.rd | 54 ++++--- man/p.WR.rd | 93 ++++++------ 94 files changed, 4326 insertions(+), 3757 deletions(-) delete mode 100644 MD5 create mode 100644 R/kish_allocation.R delete mode 100644 man/BigCity.Rd delete mode 100644 man/BigLucy.rd delete mode 100644 man/E.STPPS.rd delete mode 100644 man/E.STSI.rd delete mode 100644 man/E.STpiPS.Rd delete mode 100644 man/HH.rd delete mode 100644 man/Lucy.rd delete mode 100644 man/S.piPS.rd create mode 100644 man/kish_allocation.Rd diff --git a/DESCRIPTION b/DESCRIPTION index c09f3f7..21045fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,27 @@ Package: TeachingSampling Type: Package -Title: Selection of Samples and Parameter Estimation in Finite - Population +Title: Selection of Samples and Parameter Estimation in Finite Population +Version: 4.2.0 +Date: 2026-06-15 +Authors@R: c( + person("Hugo Andres", "Gutierrez Rojas", + email = "hagutierrezro@gmail.com", + role = c("aut", "cre")), + person("Yury Vanessa", "Ochoa Montes", + email = "yury.ochoa@urosario.edu.co", + role = "ctb", + comment = "kish_allocation function")) +Description: Allows the user to draw probabilistic samples and make + inferences from a finite population based on several sampling designs, + including simple random, systematic, Bernoulli, Poisson, PPS, + stratified, and cluster sampling. Provides Horvitz-Thompson, + Hansen-Hurwitz, and generalised regression (GREG) estimators of + totals, means, ratios, regression coefficients, and quantiles, + along with exact and approximate variance estimators. License: GPL (>= 2) -Version: 4.1.1 -Date: 2020-04-21 -Author: Hugo Andres Gutierrez Rojas -Maintainer: Hugo Andres Gutierrez Rojas Depends: R (>= 3.5), dplyr, magrittr -Description: Allows the user to draw probabilistic samples and make inferences from a finite population based on several sampling designs. Encoding: UTF-8 -RoxygenNote: 7.1.0 NeedsCompilation: no -Packaged: 2020-04-21 19:54:19 UTC; psirusteam -Repository: CRAN -Date/Publication: 2020-04-21 21:50:03 UTC +URL: https://github.com/psirusteam/TeachingSampling +BugReports: https://github.com/psirusteam/TeachingSampling/issues +Config/roxygen2/version: 8.0.0 diff --git a/MD5 b/MD5 deleted file mode 100644 index 580427f..0000000 --- a/MD5 +++ /dev/null @@ -1,109 +0,0 @@ -66358cf7c53024ec81193d1dc3e0569b *DESCRIPTION -03a489847e34cfad29dcecf522546ad2 *NAMESPACE -f938e9e6702d4b0fa52b95d65aac141d *R/Deltakl.r -c26e89bf42e9d40f70570d3fa40a27c2 *R/Domains.r -40b73b94a4acf46b5722c835180306ee *R/E.1SI.R -116bb5940489342e04f10c1391d3a1ea *R/E.2SI.r -3993547547c42677d574b67e068ff1c6 *R/E.BE.r -74f73575e0674b0a511cec8fd5cebac6 *R/E.Beta.r -c5878e4f40b41fe5bdc049f2107655e8 *R/E.PO.r -fa086957dba0dc9461dd9afe6e29f472 *R/E.PPS.r -07d9f98380516cc37354931084ef0bbb *R/E.Quantile.r -b74b6fe31eef3a24f02da19c0709acc4 *R/E.SI.r -324cbb9d93937453dcad24b8898595d1 *R/E.STPPS.r -dd065f462d96b6dd51f23c79aa50582a *R/E.STSI.r -765918d8ac225e08327ae7d4a46474c9 *R/E.STpiPS.R -1d33422cba213f7f84f7ad08d6ca4c25 *R/E.SY.r -99f05cd36aa35e1a9ad2ce5e76549ed9 *R/E.Trim.R -1d2cd45881fe7bbc1c1bdf32baf09cdf *R/E.UC.R -8a2064359292bed6b2f2fc7474924db8 *R/E.WR.r -71eab9e832f9f3c818c8322878210457 *R/E.piPS.r -81b8bebb55cafaa7ee71dfe97a17da86 *R/GREG.SI.r -d9f8cb725f40d590c46474f55510926a *R/HH.r -7b872d442bc06ce59f9eebba986a5ef9 *R/HT.r -c89ad9b876a6968a859f59d09e0bfb41 *R/IPFP.r -4e1b0efff52308be2e76063169f14088 *R/Ik.r -e9f8c7793012416bc96749d06f881fb6 *R/IkRS.r -d6fbf9f7755197d5e01503b2779afcd0 *R/IkWR.r -dfe0453ed1492f48ecaabe5b07041be8 *R/OrderWR.r -2d562377091b4531f905002e1640564c *R/Pik.r -2d74e04c4c4b57505dbba5c33329f843 *R/PikHol.r -69c6c30af63a9700e6eae2a1abba9dff *R/PikPPS.r -17a2b362f02172eb7d2989373653ac4c *R/PikSTPPS.R -05379e9c1a66539d5f5c87039eb02d40 *R/Pikl.r -ef087cb60669f4a4c5ebd3476e9fa965 *R/S.BE.r -d67fac70bc69c91016ba12e4bbdb1d8a *R/S.PO.r -622fe8c6fa82c177e29e4e2bc391e42c *R/S.PPS.r -88994b84bc62e87fe27c6c99bd1cc7f0 *R/S.SI.r -3abe556531d56ce0784cb405cb328f8b *R/S.STPPS.r -5fef5aa981fc5448f21172134a03bad1 *R/S.STSI.r -58bc25b3f07d5fa2ddf1f55f517b49b9 *R/S.STpiPS.R -f062ea7a9c012873582593787987935f *R/S.SY.r -460e9a56bc05f70c33c33d676d53535d *R/S.WR.r -7af51e2628c17740d178b8b049114ef3 *R/S.piPS.r -377734cf568ed76166ee12e3226a2ae5 *R/Support.r -2fabe9d36da6f7feb5c3d39766a56f0e *R/SupportRS.r -001fb29fdd6e52e5fa0d40a361766bf9 *R/SupportWR.r -d750f48140381578e0bed68a28d52291 *R/T.SIC.r -7792dde0176e7325f6a7b68ee5975cf4 *R/VarHT.r -b1d88100a4b0461efc5848e279fe3dc1 *R/VarSYGHT.R -1b6177a35af9e154dab1fd22bd0d17fd *R/Wk.r -191ae0699d5dcd03af1ec13531aadb9a *R/nk.r -de888f35e509b387912e6050f39ed02e *R/p.WR.r -314e416cba9e6e087c234168f66c8414 *data/BigCity.RData -026dccba4d83fd7af4ae0959ce0b3a08 *data/BigLucy.rda -f85dfd8953fdc676d9e72ddae77d7d7c *data/Lucy.rda -4eaa2fcb1e8e43987bbe08240d382333 *data/datalist -9033d529a72a540333603708c41a7206 *man/BigCity.Rd -b789dba9b29d1abbc70f18c3889194cc *man/BigLucy.rd -b04673eca685a0f8ffdead3c99fd6119 *man/Deltakl.rd -157b1ac0e2e6d06a6e84016846506776 *man/Domains.rd -674b3c42319ecce2bad8839759b976a2 *man/E.1SI.Rd -effee56d0e3b263666a1e9e376ae8dc8 *man/E.2SI.rd -8d3233f1eee3bbe26e0d7038c0c70695 *man/E.BE.rd -611584d4132e1e87c0980d57ff58f011 *man/E.Beta.rd -a33aded02f9a922e542056c1d1d4b326 *man/E.PO.rd -37a83c27a51980fb31674448fa38a2db *man/E.PPS.rd -e5acc6bb09a2be266fc2f2d88eb27a79 *man/E.Quantile.rd -a6cfa5afed72295f8b61ddb2df9c65ef *man/E.SI.rd -0c9e1c8ac9423945742a875c08204921 *man/E.STPPS.rd -69b32ef51bccd2fbb219d6fb3c3c4e1a *man/E.STSI.rd -91f669f181b4fa5f7e4d7cf195e57b82 *man/E.STpiPS.Rd -b7673dda03958bf93b0d36dfbab9dc7c *man/E.SY.rd -18d48f27943cfcd1e7d19ad4f800d70c *man/E.Trim.Rd -7917bf3e94e5ebcd1099b5ca9201f70e *man/E.UC.Rd -2b311f1513524342923cc79ad1c6ad02 *man/E.WR.rd -ad18141f5df3a7c89484b1eded8dbf40 *man/E.piPS.rd -ca9123319cb2441952045b3345047dde *man/GREG.SI.rd -1884a7c9ea1b6897b52dda5b6ecb30bd *man/HH.rd -a80ca4c3d3e9c2cb9795066c617a042f *man/HT.rd -49afa6f71a05896264f189dc762699a0 *man/IPFP.rd -3e86adedcafb799683c80c15fd38b391 *man/Ik.rd -d4dd45b93c85b62f98e9c9c453c126cf *man/IkRS.rd -cf4caeac77c2005242c8f0d92829cb6e *man/IkWR.rd -bc1f82f2e43f38b8179148e644d66c33 *man/Lucy.rd -3ecc10be567e4caf48150b3bfbaa5cee *man/OrderWR.rd -23db45af738633074d96716167955543 *man/Pik.rd -344b2e01eda67cb995e2b93c25a5e3d8 *man/PikHol.rd -2262907a39785be07991c435f6f4ca16 *man/PikPPS.rd -01bed42e6eba9feca3041c00109445a6 *man/PikSTPPS.Rd -d893a402195006d0a7f5dfdaaf7f4f62 *man/Pikl.rd -153c1db83965ddc84cd6c591f927260f *man/S.BE.rd -a80b1f069dc66d2d9a1ceeec2002eacf *man/S.PO.rd -b98bc055ab1e176f812c14378d9c138d *man/S.PPS.rd -059c41796b4c2240b76fed2f90236106 *man/S.SI.rd -91ed9705e66a07755084eada3061ade2 *man/S.STPPS.rd -462b79ded52c76bb448796dc73412908 *man/S.STSI.rd -575b90af4f9bf0220114d3f61df3e9e5 *man/S.STpiPS.Rd -01d2ce9f1adc47f3152aacfbd49b3960 *man/S.SY.rd -6165fa6e4aba2ee1372cbdd806d746b4 *man/S.WR.rd -93cb3c2c714c71f5cbbb41ff18a07311 *man/S.piPS.rd -c5aed92af685317377d5dd2d9840bd66 *man/Support.rd -9b50c250efae5c9b213fe8a80053d7b8 *man/SupportRS.rd -ff19964b2f0fe87f38a1c5151f19b8eb *man/SupportWR.rd -0e90c70f5b928423c54409727ad0f8fa *man/T.SIC.rd -c4b964cbf99eb877ae9b76cbdef964c3 *man/VarHT.rd -cd2eece3bbf341c9086429ead6f569b3 *man/VarSYGHT.Rd -b34f77a1d5b9bd3defd66a1a7321f49f *man/Wk.rd -2cbe6fb4bad1dbb5db9c1dcf8bb1544f *man/nk.rd -8c0d3ae9dbdfffafa814e14db4f2ef7e *man/p.WR.rd diff --git a/NAMESPACE b/NAMESPACE index adb0255..dd71e7d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ export(T.SIC) export(VarHT) export(VarSYGHT) export(Wk) +export(kish_allocation) export(nk) export(p.WR) import(stats) diff --git a/R/Deltakl.r b/R/Deltakl.r index a98cbc9..9dff471 100644 --- a/R/Deltakl.r +++ b/R/Deltakl.r @@ -1,8 +1,47 @@ #' @export +#' +#' @title +#' Matrix of Joint Inclusion Probability Differences +#' @description +#' Computes the matrix \eqn{\Delta_{kl} = \pi_{kl} - \pi_k \pi_l} for all +#' pairs of units in a finite population. This matrix appears in the exact +#' Horvitz-Thompson variance formula. +#' @return +#' An \code{N x N} matrix where entry \eqn{(k, l)} equals +#' \eqn{\pi_{kl} - \pi_k \pi_l}. Diagonal entries equal +#' \eqn{\pi_k(1 - \pi_k)}. +#' @details +#' The matrix \eqn{\Delta} is central to the Horvitz-Thompson variance +#' estimator: +#' \deqn{V(\hat{t}_{y,\pi}) = \sum_k \sum_l \Delta_{kl} \frac{y_k}{\pi_k} +#' \frac{y_l}{\pi_l}} +#' It requires computing both first-order (\code{\link{Pik}}) and +#' second-order (\code{\link{Pikl}}) inclusion probabilities, so it is only +#' feasible for small populations. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. Recommended \code{N <= 15}. +#' @param n Sample size. +#' @param p Vector of probabilities for each possible sample in the support. +#' Must sum to 1. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Pik}}, \code{\link{Pikl}}, \code{\link{VarHT}} +#' +#' @examples +#' N <- 5 +#' n <- 2 +#' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) +#' Delta <- Deltakl(N, n, p) +#' Delta -Deltakl <- function(N, n, p){ -Ind <- Ik(N,n) -P1 <- as.matrix(Pik(p, Ind)) -Delta <-Pikl(N,n,p)-(t(P1)%*%P1) -return(Delta) -} +Deltakl <- function(N, n, p) { + Ind <- Ik(N, n) + P1 <- as.matrix(Pik(p, Ind)) + Delta <- Pikl(N, n, p) - (t(P1) %*% P1) + return(Delta) +} \ No newline at end of file diff --git a/R/Domains.r b/R/Domains.r index 7d1686d..6feb92d 100644 --- a/R/Domains.r +++ b/R/Domains.r @@ -1,12 +1,52 @@ #' @export +#' +#' @title +#' Domain Indicator Matrix +#' @description +#' Creates a binary indicator matrix that identifies the domain membership +#' of each unit in the sample. Each column corresponds to one domain +#' (level of \code{y}) and each row to one unit. +#' @return +#' A binary matrix of dimension \code{n x D}, where \code{D} is the number +#' of domains (levels of \code{y}). Entry \eqn{(k, d) = 1} if unit \eqn{k} +#' belongs to domain \eqn{d}, and 0 otherwise. Column names are the domain +#' labels. +#' @details +#' This function is useful for domain estimation, where population totals or +#' means must be estimated for subgroups of the population. The indicator +#' matrix can be multiplied element-wise with the variable of interest to +#' restrict estimation to each domain. +#' @author Hugo Andres Gutierrez Rojas +#' @param y A vector (factor or coercible to factor) identifying the domain +#' membership of each unit in the sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.SI}}, \code{\link{E.STSI}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' sam <- S.SI(N, n) +#' # Level has 3 domains: Small, Medium, Big +#' dom <- Domains(Level[sam]) +#' head(dom) +#' colSums(dom) # sample sizes per domain -Domains<-function(y){ -y<-as.factor(y) -d<-as.double(y) -n<-length(d) -Dom<-matrix(0,n,max(d)) -colnames(Dom)<-levels(y) -for(k in 1: max(d)){ -Dom[,k]<-as.double(d==k)} -Dom +Domains <- function(y) { + y <- as.factor(y) + d <- as.double(y) + n <- length(d) + Dom <- matrix(0, n, max(d)) + colnames(Dom) <- levels(y) + for (k in 1:max(d)) { + Dom[, k] <- as.double(d == k) + } + Dom } \ No newline at end of file diff --git a/R/E.2SI.r b/R/E.2SI.r index 141fe7c..69dea93 100644 --- a/R/E.2SI.r +++ b/R/E.2SI.r @@ -1,32 +1,96 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Two Stage Simple Random Sampling +#' @description +#' Computes the Horvitz-Thompson estimator of the population total under a +#' two-stage simple random sampling without replacement design, where both +#' Primary Sampling Units (PSUs) and Secondary Sampling Units (SSUs) are +#' selected by simple random sampling without replacement. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error of the total. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @details +#' The variance estimator decomposes into two components: the between-PSU +#' component and the within-PSU component, following the classical two-stage +#' variance decomposition of Sarndal et al. (1992). +#' @author Hugo Andres Gutierrez Rojas +#' @param NI Population size of Primary Sampling Units (PSUs). +#' @param nI Sample size of Primary Sampling Units (PSUs). +#' @param Ni Vector of population sizes of Secondary Sampling Units within +#' each selected PSU. +#' @param ni Vector of sample sizes of Secondary Sampling Units within +#' each selected PSU. +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every unit in the selected sample. +#' @param PSU Vector identifying the PSU membership of each unit in the sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.1SI}}, \code{\link{E.UC}} +#' +#' @examples +#' library(TeachingSampling) +#' data('BigCity') +#' library(dplyr) +#' Households <- BigCity %>% +#' group_by(HHID) %>% +#' summarise(PSU = unique(PSU), +#' Persons = n(), +#' Income = sum(Income), +#' Expenditure = sum(Expenditure)) +#' +#' UI <- levels(as.factor(Households$PSU)) +#' NI <- length(UI) +#' nI <- 10 +#' samI <- S.SI(NI, nI) +#' sampleI <- UI[samI] +#' CityI <- Households[Households$PSU %in% sampleI, ] +#' +#' Ni <- as.numeric(table(CityI$PSU)) +#' ni <- ceiling(Ni * 0.2) +#' +#' estima <- data.frame(CityI$Persons, CityI$Income, CityI$Expenditure) +#' area <- as.factor(CityI$PSU) +#' +#' E.2SI(NI, nI, Ni, ni, estima, area) -E.2SI<-function(NI,nI,Ni,ni,y,PSU){ - y<-cbind(1,y) - y<-as.data.frame(y) +E.2SI <- function(NI, nI, Ni, ni, y, PSU) { + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - PSU<-as.factor(PSU) + PSU <- as.factor(PSU) - Total<-matrix(NA,nrow=4,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Total)<-names(y) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) - f<-ni/Ni - F<-nI/NI + f <- ni/Ni + F <- nI/NI - for(k in 1:dim(y)[2]){ - ysum<- tapply(y[,k],PSU,sum) - s2i <- tapply(y[,k],PSU,var) - ti <- (1/f)*ysum - ty <- (1/F)*sum(ti) - part.1 <- NI^2/nI*(1-F)*var(ti) - part.2 <- NI/nI*sum(Ni^2/ni*(1-f)*s2i) - Vty <- part.1+part.2 - CVe<-100*sqrt(Vty)/ty - n<-length(y[,k]) - N<-(NI/nI)*sum(Ni) - VMAS<-(N^2)*(1-(n/N))*var(y[,k])/(n) - DEFF<-Vty/VMAS - Total[,k]<-c(ty,sqrt(Vty),CVe,DEFF) + for (k in 1:dim(y)[2]) { + ysum <- tapply(y[, k], PSU, sum) + s2i <- tapply(y[, k], PSU, var) + ti <- (1/f) * ysum + ty <- (1/F) * sum(ti) + part.1 <- NI^2/nI * (1 - F) * var(ti) + part.2 <- NI/nI * sum(Ni^2/ni * (1 - f) * s2i) + Vty <- part.1 + part.2 + CVe <- 100 * sqrt(Vty)/ty + n <- length(y[, k]) + N <- (NI/nI) * sum(Ni) + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) -} +} \ No newline at end of file diff --git a/R/E.BE.r b/R/E.BE.r index 7dc6cbf..171e134 100644 --- a/R/E.BE.r +++ b/R/E.BE.r @@ -1,22 +1,64 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Bernoulli Sampling +#' @description +#' Computes the Horvitz-Thompson estimator of the population total under a +#' Bernoulli sampling design, where each unit in the population is independently +#' selected with the same inclusion probability. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error of the total. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @details +#' Under Bernoulli sampling, the sample size is random. The inclusion +#' probability is constant and equal to \code{prob} for all units. The +#' variance estimator accounts for the randomness of the sample size. +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every unit in the selected sample. +#' @param prob Scalar. The (constant) inclusion probability used in the +#' Bernoulli sampling design. Must satisfy \code{0 < prob <= 1}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.BE}}, \code{\link{E.SI}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' prob <- 0.1 +#' sam <- S.BE(N, prob) +#' sam <- sam[sam != 0] +#' y <- data.frame(Income = Income[sam], Employees = Employees[sam]) +#' E.BE(y, prob) -E.BE<-function(y,prob){ - y<-cbind(1,y) - y<-as.data.frame(y) +E.BE <- function(y, prob) { + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - Total<-matrix(NA,nrow=4,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Total)<-names(y) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) - for(k in 1:dim(y)[2]){ - ty<-sum(y[,k])/prob - Vty<-(1/prob)*((1/prob)-1)*sum(y[,k]^2) - CVe<-100*sqrt(Vty)/ty - n<-length(y[,k]) - N<-n/prob - VMAS<-(N^2)*(1-(n/N))*var(y[,k])/(n) - DEFF<-Vty/VMAS - Total[,k]<-c(ty,sqrt(Vty),CVe,DEFF) + for (k in 1:dim(y)[2]) { + ty <- sum(y[, k])/prob + Vty <- (1/prob) * ((1/prob) - 1) * sum(y[, k]^2) + CVe <- 100 * sqrt(Vty)/ty + n <- length(y[, k]) + N <- n/prob + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) } \ No newline at end of file diff --git a/R/E.Beta.r b/R/E.Beta.r index e717f45..77ce598 100644 --- a/R/E.Beta.r +++ b/R/E.Beta.r @@ -1,49 +1,93 @@ #' @export +#' +#' @title +#' Estimation of Regression Coefficients under Simple Random Sampling +#' @description +#' Computes the weighted least squares estimator of regression coefficients +#' for a finite population under simple random sampling without replacement. +#' Both the estimated coefficients and their estimated standard errors are +#' returned. +#' @return +#' A three-dimensional array with dimensions \code{[3, P, Q]}, where +#' \code{P} is the number of auxiliary variables and \code{Q} is the number +#' of variables of interest. The three rows correspond to: +#' \itemize{ +#' \item \code{Beta estimation}: Estimated regression coefficient. +#' \item \code{Standard Error}: Estimated standard error. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' } +#' @details +#' The estimator uses a working model with weights \eqn{V = 1/(\pi_k c_k)}, +#' where \eqn{\pi_k = n/N} under simple random sampling and \eqn{c_k} is an +#' optional variance-stabilising constant. The variance is estimated using +#' the residual-based sandwich approach of Sarndal et al. (1992). +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param n Sample size. +#' @param y Vector, matrix or data frame of variables of interest (response). +#' @param x Vector, matrix or data frame of auxiliary variables (predictors). +#' @param ck Optional variance-stabilising constant. Default is \code{1} +#' (homoscedastic model). +#' @param b0 Logical. If \code{TRUE}, an intercept column of ones is +#' prepended to \code{x}. Default is \code{FALSE}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{GREG.SI}}, \code{\link{E.SI}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' sam <- S.SI(N, n) +#' y <- data.frame(Income = Income[sam]) +#' x <- data.frame(Employees = Employees[sam]) +#' E.Beta(N, n, y, x, b0 = TRUE) -E.Beta<-function(N, n, y, x, ck=1, b0=FALSE){ +E.Beta <- function(N, n, y, x, ck = 1, b0 = FALSE) { if (b0 == TRUE) { - x<-as.data.frame(cbind(1,x)) + x <- as.data.frame(cbind(1, x)) } - #--------------- Q <- dim(as.matrix(y))[2] P <- dim(as.matrix(x))[2] - #--------------- - Total<-array(NA,c(3,P,Q)) - rownames(Total)=c("Beta estimation", "Standard Error","CVE") - colnames(Total)<-names(x) - dimnames(Total)[[3]]<-names(y) - #--------------- - Pik <- rep(n/N,n) - for(q in 1:Q){ - yq<-as.matrix(y[,q]) - x<-as.matrix(x) - ck<-as.numeric(unlist(ck)) - V<-1/(Pik*ck) - bq<-solve(t(V*x)%*%x)%*%(t(V*x)%*%yq) - ek <- yq - x%*%bq - uk <- c(ek)*x - Varuk <- (N^2/n)*(1-(n/N))*var(uk) - P1 <- solve(t(V*x)%*%x) - Vbeta <- as.matrix(P1)%*%as.matrix(Varuk)%*%as.matrix(P1) + Total <- array(NA, c(3, P, Q)) + rownames(Total) <- c("Beta estimation", "Standard Error", "CVE") + colnames(Total) <- names(x) + dimnames(Total)[[3]] <- names(y) + Pik <- rep(n/N, n) + for (q in 1:Q) { + yq <- as.matrix(y[, q]) + x <- as.matrix(x) + ck <- as.numeric(unlist(ck)) + V <- 1/(Pik * ck) + bq <- solve(t(V * x) %*% x) %*% (t(V * x) %*% yq) + ek <- yq - x %*% bq + uk <- c(ek) * x + Varuk <- (N^2/n) * (1 - (n/N)) * var(uk) + P1 <- solve(t(V * x) %*% x) + Vbeta <- as.matrix(P1) %*% as.matrix(Varuk) %*% as.matrix(P1) Vbeta <- diag(Vbeta) - CVe <-100*sqrt(Vbeta)/bq - #--------------- - if(Q == 1){ - Total[1,,]<-bq - Total[2,,]<-sqrt(Vbeta) - Total[3,,]<-CVe + CVe <- 100 * sqrt(Vbeta)/bq + if (Q == 1) { + Total[1, , ] <- bq + Total[2, , ] <- sqrt(Vbeta) + Total[3, , ] <- CVe } - if(P == 1 & Q > 1){ - Total[1,,][q]<-bq - Total[2,,][q]<-sqrt(Vbeta) - Total[3,,][q]<-CVe + if (P == 1 & Q > 1) { + Total[1, , ][q] <- bq + Total[2, , ][q] <- sqrt(Vbeta) + Total[3, , ][q] <- CVe } - if(Q > 1 & P > 1){ - Total[1,,][,q]<-bq - Total[2,,][,q]<-sqrt(Vbeta) - Total[3,,][,q]<-CVe + if (Q > 1 & P > 1) { + Total[1, , ][, q] <- bq + Total[2, , ][, q] <- sqrt(Vbeta) + Total[3, , ][, q] <- CVe } - #--------------- } return(Total) } \ No newline at end of file diff --git a/R/E.PO.r b/R/E.PO.r index 8352335..b684a10 100644 --- a/R/E.PO.r +++ b/R/E.PO.r @@ -1,21 +1,64 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Poisson Sampling +#' @description +#' Computes the Horvitz-Thompson estimator of the population total under a +#' Poisson sampling design, where each unit is independently selected with +#' its own inclusion probability. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error of the total. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @details +#' Under Poisson sampling, units are selected independently, so the exact +#' variance of the Horvitz-Thompson estimator has a simple closed form: +#' \eqn{V(\hat{t}) = \sum_k (1 - \pi_k)(y_k/\pi_k)^2}. +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every unit in the selected sample. +#' @param Pik Vector of first-order inclusion probabilities for each unit +#' in the sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.PO}}, \code{\link{E.piPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' Pik <- PikPPS(n, Employees) +#' sam <- S.PO(N, Pik) +#' sam <- sam[sam != 0] +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.PO(y, Pik[sam]) -E.PO<-function(y,Pik){ - y<-cbind(1,y) - y<-as.data.frame(y) +E.PO <- function(y, Pik) { + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - Total<-matrix(NA,nrow=4,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Total)<-names(y) - n<-length(Pik) - for(k in 1:dim(y)[2]){ - ty<-sum(y[,k]/Pik) - Vty<-sum((1-Pik)*((y[,k]/Pik)^2)) - CVe<-100*sqrt(Vty)/ty - N<-sum(1/Pik) - VMAS<-(N^2)*(1-(n/N))*var(y[,k])/(n) - DEFF<-Vty/VMAS - Total[,k]<-c(ty,sqrt(Vty),CVe,DEFF) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + n <- length(Pik) + for (k in 1:dim(y)[2]) { + ty <- sum(y[, k]/Pik) + Vty <- sum((1 - Pik) * ((y[, k]/Pik)^2)) + CVe <- 100 * sqrt(Vty)/ty + N <- sum(1/Pik) + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) } \ No newline at end of file diff --git a/R/E.PPS.r b/R/E.PPS.r index 5403315..43d1d6e 100644 --- a/R/E.PPS.r +++ b/R/E.PPS.r @@ -1,21 +1,61 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under PPS With-Replacement Sampling +#' @description +#' Computes the Hansen-Hurwitz estimator of the population total under a +#' probability proportional to size with-replacement (PPS-WR) sampling design. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error of the total. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @details +#' The Hansen-Hurwitz estimator is \eqn{\hat{t} = (1/m)\sum_{i=1}^m y_i/p_i}, +#' where \eqn{p_i} is the selection probability of the \eqn{i}-th draw and +#' \eqn{m} is the number of draws. +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every selected unit (with possible repetitions). +#' @param pk Vector of selection probabilities for each draw in the sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.PPS}}, \code{\link{HH}}, \code{\link{E.piPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' m <- 400 +#' res <- S.PPS(m, Employees) +#' sam <- res[, 1] +#' pk <- res[, 2] +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.PPS(y, pk) -E.PPS<-function(y,pk){ - y<-cbind(1,y) - y<-as.data.frame(y) +E.PPS <- function(y, pk) { + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - Total<-matrix(NA,nrow=4,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Total)<-names(y) - m<-length(pk) - for(k in 1:dim(y)[2]){ - ty<-sum(y[,k]/pk)/m - Vty<-(1/m)*(1/(m-1))*sum((y[,k]/pk-ty)^2) - CVe<-100*sqrt(Vty)/ty - N<-(1/m)*sum(1/pk) - VMAS<-(N^2)*(1-(m/N))*var(y[,k])/(m) - DEFF<-Vty/VMAS - Total[,k]<-c(ty,sqrt(Vty),CVe,DEFF) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + m <- length(pk) + for (k in 1:dim(y)[2]) { + ty <- sum(y[, k]/pk)/m + Vty <- (1/m) * (1/(m - 1)) * sum((y[, k]/pk - ty)^2) + CVe <- 100 * sqrt(Vty)/ty + N <- (1/m) * sum(1/pk) + VMAS <- (N^2) * (1 - (m/N)) * var(y[, k])/(m) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) } \ No newline at end of file diff --git a/R/E.Quantile.r b/R/E.Quantile.r index 4b482ad..cc234d6 100644 --- a/R/E.Quantile.r +++ b/R/E.Quantile.r @@ -1,41 +1,78 @@ #' @export +#' +#' @title +#' Estimation of Population Quantiles +#' @description +#' Computes a weighted quantile estimator for finite populations. When +#' inclusion probabilities are provided, the estimator uses the +#' Horvitz-Thompson weights \eqn{d_k = 1/\pi_k}; otherwise, equal weights +#' are assumed (simple random sampling). +#' @return +#' A numeric vector of length equal to the number of variables in \code{y}, +#' containing the estimated quantile for each variable. +#' @details +#' The estimator is based on the weighted empirical cumulative distribution +#' function. For each variable, units are sorted by their observed value, +#' cumulative weights are computed, and the quantile is located by +#' interpolation. +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every unit in the selected sample. +#' @param Qn Scalar in \eqn{(0, 1)}. The desired quantile level +#' (e.g. \code{0.5} for the median, \code{0.25} for the first quartile). +#' @param Pik Optional vector of first-order inclusion probabilities. If +#' omitted, equal probabilities are assumed. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.SI}}, \code{\link{E.piPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' sam <- S.SI(N, n) +#' Pik <- rep(n/N, n) +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' +#' # Median +#' E.Quantile(y, Qn = 0.5, Pik = Pik) +#' # First quartile +#' E.Quantile(y, Qn = 0.25, Pik = Pik) E.Quantile <- function(y, Qn, Pik) { -y<-as.data.frame(y) -Total<-rep(NA,dim(y)[2]) - + y <- as.data.frame(y) + Total <- rep(NA, dim(y)[2]) if (missing(Pik)) - Pik <- rep(1, dim(y)[1]) + Pik <- rep(1, dim(y)[1]) if (any(Pik < 0)) - stop("Probabilities must be positive.") - -w <- 1/Pik -n <- length(w) - -for(i in 1:dim(y)[2]){ - -ord <- order(y[,i]) -x <- y[ord,i] -w <- w[ord] -wcum <- cumsum(w) -wsum <- wcum[n] -wper <- wsum*Qn -lows <- (wcum <= wper) -k <- sum(lows) - if (k!=0 && k!=n){ - wlow <- wcum[k] + stop("Probabilities must be positive.") + w <- 1/Pik + n <- length(w) + for (i in 1:dim(y)[2]) { + ord <- order(y[, i]) + x <- y[ord, i] + w <- (1/Pik)[ord] + wcum <- cumsum(w) + wsum <- wcum[n] + wper <- wsum * Qn + lows <- (wcum <= wper) + k <- sum(lows) + if (k != 0 && k != n) { + wlow <- wcum[k] whigh <- wsum - wlow - if (whigh > wper) - Total[i]<-x[k+1] - else - Total[i]<-(wlow*x[k] + whigh*x[k+1]) / wsum - } - if (k == 0) { - Total[i] <- x[1] - } - if (k == n) { - Total[i] <- x[n] + if (whigh > wper) + Total[i] <- x[k + 1] + else + Total[i] <- (wlow * x[k] + whigh * x[k + 1])/wsum + } + if (k == 0) Total[i] <- x[1] + if (k == n) Total[i] <- x[n] } -} -return(Total) -} + return(Total) +} \ No newline at end of file diff --git a/R/E.SI.r b/R/E.SI.r index 064f427..54d0be1 100644 --- a/R/E.SI.r +++ b/R/E.SI.r @@ -1,21 +1,63 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Simple Random Sampling Without +#' Replacement +#' @description +#' Computes the Horvitz-Thompson estimator of the population total under a +#' simple random sampling without replacement (SI) design. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error of the total. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect (always 1 under SI, included for +#' consistency with other estimators). +#' } +#' @details +#' Under simple random sampling without replacement, the Horvitz-Thompson +#' estimator reduces to \eqn{\hat{t}_y = N \bar{y}_s}, the expansion +#' estimator. The design effect is always 1 because SI is the reference design. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param n Sample size. +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every unit in the selected sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.SI}}, \code{\link{E.STSI}}, \code{\link{GREG.SI}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' sam <- S.SI(N, n) +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.SI(N, n, y) -E.SI<-function(N,n,y){ - y<-cbind(1,y) - y<-as.data.frame(y) +E.SI <- function(N, n, y) { + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - Total<-matrix(NA,nrow=4,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Total)<-names(y) - pik<-matrix(n/N, nrow=n, ncol=1) - dk<-1/pik - for(k in 1:dim(y)[2]){ - ty<-sum(y[,k]*dk) - Vty<-(N^2)*(1-(n/N))*var(y[,k])/(n) - CVe<-100*sqrt(Vty)/ty - VMAS<-(N^2)*(1-(n/N))*var(y[,k])/(n) - DEFF<-Vty/VMAS - Total[,k]<-c(ty,sqrt(Vty),CVe,DEFF) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + pik <- matrix(n/N, nrow = n, ncol = 1) + dk <- 1/pik + for (k in 1:dim(y)[2]) { + ty <- sum(y[, k] * dk) + Vty <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + CVe <- 100 * sqrt(Vty)/ty + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) } \ No newline at end of file diff --git a/R/E.SY.r b/R/E.SY.r index 6d14a7c..f6f8fa6 100644 --- a/R/E.SY.r +++ b/R/E.SY.r @@ -1,21 +1,62 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Systematic Sampling +#' @description +#' Computes the Horvitz-Thompson estimator of the population total under a +#' systematic sampling design with sampling interval \code{a}. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error of the total. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @details +#' Under systematic sampling the sample size is \eqn{n = N/a}. Because only +#' one systematic sample is observed, the variance cannot be estimated without +#' assumptions. Here, the variance is approximated by treating the systematic +#' sample as a simple random sample of the same size, which is a common +#' conservative approximation. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param a Sampling interval (skip). The expected sample size is \code{N/a}. +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every unit in the selected sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.SY}}, \code{\link{E.SI}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' a <- 10 +#' sam <- S.SY(N, a) +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.SY(N, a, y) -E.SY<-function(N,a,y){ - n<-N/a - y<-cbind(1,y) - y<-as.data.frame(y) +E.SY <- function(N, a, y) { + n <- N/a + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - Total<-matrix(NA,nrow=4,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Total)<-names(y) - - for(k in 1:dim(y)[2]){ - ty<-a*sum(y[,k]) - Vty<-(N^2)*(1-(n/N))*var(y[,k])/(n) - CVe<-100*sqrt(Vty)/ty - VMAS<-(N^2)*(1-(n/N))*var(y[,k])/(n) - DEFF<-Vty/VMAS - Total[,k]<-c(ty,sqrt(Vty),CVe,DEFF) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + for (k in 1:dim(y)[2]) { + ty <- a * sum(y[, k]) + Vty <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + CVe <- 100 * sqrt(Vty)/ty + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) } \ No newline at end of file diff --git a/R/E.WR.r b/R/E.WR.r index 9bde3c0..a564066 100644 --- a/R/E.WR.r +++ b/R/E.WR.r @@ -1,21 +1,60 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Simple Random Sampling With +#' Replacement +#' @description +#' Computes the Hansen-Hurwitz estimator of the population total under a +#' simple random sampling with replacement (WR) design. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error of the total. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling +#' without replacement. +#' } +#' @details +#' Under simple random sampling with replacement with \code{m} draws, the +#' Hansen-Hurwitz estimator is \eqn{\hat{t} = (N/m)\sum_{i=1}^m y_i}. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param m Number of draws (sample size with replacement). +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every draw in the sample (repetitions allowed). +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.WR}}, \code{\link{E.SI}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' m <- 400 +#' sam <- S.WR(N, m) +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.WR(N, m, y) -E.WR<-function(N,m,y){ - y<-cbind(1,y) - y<-as.data.frame(y) +E.WR <- function(N, m, y) { + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - Total<-matrix(NA,nrow=4,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Total)<-names(y) - - for(k in 1:dim(y)[2]){ - - ty<-(N/m)*sum(y[,k]) - Vty<-(N^2/m)*var(y[,k]) - CVe<-100*sqrt(Vty)/ty - VMAS<-(N^2)*(1-(m/N))*var(y[,k])/(m) - DEFF<-Vty/VMAS - Total[,k]<-c(ty,sqrt(Vty),CVe,DEFF) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + for (k in 1:dim(y)[2]) { + ty <- (N/m) * sum(y[, k]) + Vty <- (N^2/m) * var(y[, k]) + CVe <- 100 * sqrt(Vty)/ty + VMAS <- (N^2) * (1 - (m/N)) * var(y[, k])/(m) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) -} +} \ No newline at end of file diff --git a/R/E.piPS.r b/R/E.piPS.r index 4ccf1fe..cd2391e 100644 --- a/R/E.piPS.r +++ b/R/E.piPS.r @@ -1,32 +1,75 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Pi Probability Proportional to +#' Size Sampling +#' @description +#' Computes the Horvitz-Thompson estimator of the population total under a +#' without-replacement probability proportional to size (piPS) sampling design. +#' The variance is estimated using the Horvitz-Thompson variance approximation +#' based on first-order inclusion probabilities. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error of the total. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @details +#' When all inclusion probabilities are equal (i.e. \code{sum(Pik) == n}), +#' the variance is set to zero, reflecting an equal-probability design. +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every unit in the selected sample. +#' @param Pik Vector of first-order inclusion probabilities for each +#' unit in the sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.piPS}}, \code{\link{PikPPS}}, \code{\link{E.PO}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' x <- Employees +#' res <- S.piPS(n, x) +#' sam <- res[, 1] +#' Pik <- res[, 2] +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.piPS(y, Pik) -E.piPS <-function(y,Pik){ - y<-cbind(1,y) - y<-as.data.frame(y) +E.piPS <- function(y, Pik) { + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - Total<-matrix(NA,nrow=4,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Total)<-names(y) - n<-length(Pik) - for(k in 1:dim(y)[2]){ - ty<-sum(y[,k]/Pik) - #------------------- - ck <- (1-Pik)*(n/(n-1)) - P1 <- sum(ck*y[,k]/Pik) - P2 <- sum(ck) - ystar <- Pik*P1/P2 - P3 <- ck/(Pik^2) - #-------------------- - if(sum(Pik) == n){ + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + n <- length(Pik) + for (k in 1:dim(y)[2]) { + ty <- sum(y[, k]/Pik) + ck <- (1 - Pik) * (n/(n - 1)) + P1 <- sum(ck * y[, k]/Pik) + P2 <- sum(ck) + ystar <- Pik * P1/P2 + P3 <- ck/(Pik^2) + if (sum(Pik) == n) { Vty <- 0 } else { - Vty <- sum(P3*((y[,k]-ystar)^2)) + Vty <- sum(P3 * ((y[, k] - ystar)^2)) } - CVe<-100*sqrt(Vty)/ty - N<-sum(1/Pik) - VMAS<-(N^2)*(1-(n/N))*var(y[,k])/(n) - DEFF<-Vty/VMAS - Total[,k]<-c(ty,sqrt(Vty),CVe,DEFF) + CVe <- 100 * sqrt(Vty)/ty + N <- sum(1/Pik) + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) -} +} \ No newline at end of file diff --git a/R/GREG.SI.r b/R/GREG.SI.r index 45ef5c4..9db7e37 100644 --- a/R/GREG.SI.r +++ b/R/GREG.SI.r @@ -1,26 +1,75 @@ #' @export +#' +#' @title +#' Generalised Regression Estimator under Simple Random Sampling +#' @description +#' Computes the Generalised Regression (GREG) estimator of the population +#' total under simple random sampling without replacement, using known +#' population totals of auxiliary variables to improve efficiency. +#' @return +#' A matrix with three rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: GREG estimated population total. +#' \item \code{Standard Error}: Estimated standard error. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' } +#' @details +#' The GREG estimator is: +#' \deqn{\hat{t}_{GREG} = \hat{t}_{HT} + (\mathbf{t}_x - +#' \hat{\mathbf{t}}_{x,HT})^T \hat{\boldsymbol{\beta}}} +#' where \eqn{\hat{\boldsymbol{\beta}}} are the regression coefficients +#' estimated from the sample, \eqn{\mathbf{t}_x} are the known population +#' totals, and variance is estimated from the residuals. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param n Sample size. +#' @param y Vector, matrix or data frame of variables of interest. +#' @param x Vector, matrix or data frame of auxiliary variables observed +#' in the sample. +#' @param tx Vector of known population totals for the auxiliary variables. +#' @param b Matrix of regression coefficients (e.g. from \code{\link{E.Beta}}). +#' @param b0 Logical. If \code{TRUE}, an intercept column is prepended to +#' \code{x}. Default is \code{FALSE}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.Beta}}, \code{\link{E.SI}}, \code{\link{Wk}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' sam <- S.SI(N, n) +#' y <- data.frame(Income = Income[sam]) +#' x <- data.frame(Employees = Employees[sam]) +#' tx <- sum(Employees) +#' b <- E.Beta(N, n, y, x, b0 = FALSE) +#' GREG.SI(N, n, y, x, tx, b) -GREG.SI<-function(N,n,y,x,tx,b,b0=FALSE){ - y<-as.data.frame(y) - x<-as.matrix(x) - pik<-rep(n/N,n) - dk<-1/pik - if (b0 == TRUE){ - x<-as.matrix(cbind(1,x))} - - Total<-matrix(NA,nrow=3,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE") - colnames(Total)<-names(y) - - for(k in 1:dim(y)[2]){ - - xHT <- t(x)%*%dk - yHT <- sum(y[,k]*dk) - ty <- yHT + (tx-t(xHT))%*%as.matrix(b[,k]) - e <- y[,k]-(x%*%as.matrix(b[,k])) - Vty <- (N^2)*(1-(n/N))*var(e)/(n) - CVe <- 100*sqrt(Vty)/ty - Total[,k] <- c(ty,sqrt(Vty),CVe) +GREG.SI <- function(N, n, y, x, tx, b, b0 = FALSE) { + y <- as.data.frame(y) + x <- as.matrix(x) + pik <- rep(n/N, n) + dk <- 1/pik + if (b0 == TRUE) { + x <- as.matrix(cbind(1, x)) + } + Total <- matrix(NA, nrow = 3, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE") + colnames(Total) <- names(y) + for (k in 1:dim(y)[2]) { + xHT <- t(x) %*% dk + yHT <- sum(y[, k] * dk) + ty <- yHT + (tx - t(xHT)) %*% as.matrix(b[, k]) + e <- y[, k] - (x %*% as.matrix(b[, k])) + Vty <- (N^2) * (1 - (n/N)) * var(e)/(n) + CVe <- 100 * sqrt(Vty)/ty + Total[, k] <- c(ty, sqrt(Vty), CVe) } return(Total) } \ No newline at end of file diff --git a/R/HT.r b/R/HT.r index 22cda7d..3d67c37 100644 --- a/R/HT.r +++ b/R/HT.r @@ -1,8 +1,49 @@ #' @export +#' +#' @title +#' Horvitz-Thompson Estimator of the Population Total +#' @description +#' Computes the Horvitz-Thompson (HT) estimator of the population total for +#' one or more variables of interest, given the sample observations and their +#' first-order inclusion probabilities. +#' @return +#' A numeric vector or matrix with the estimated total for each variable +#' of interest. +#' @details +#' The Horvitz-Thompson estimator is defined as: +#' \deqn{\hat{t}_{y,\pi} = \sum_{k \in s} \frac{y_k}{\pi_k}} +#' where \eqn{\pi_k} is the first-order inclusion probability of unit \eqn{k}. +#' This estimator is design-unbiased for any fixed-size sampling design. +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector or matrix of values of the variable(s) of interest for +#' units in the sample. +#' @param Pik Vector of first-order inclusion probabilities for each unit +#' in the sample. +#' +#' @references +#' Horvitz, D.G. and Thompson, D.J. (1952). A generalization of sampling +#' without replacement from a finite universe. +#' \emph{Journal of the American Statistical Association}, 47, 663-685.\cr +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer. +#' +#' @seealso \code{\link{VarHT}}, \code{\link{E.SI}}, \code{\link{E.piPS}} +#' +#' @examples +#' # Population N = 5, sample size n = 2 +#' N <- 5 +#' n <- 2 +#' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) +#' y <- c(32, 34, 46, 89, 35) +#' Ind <- Ik(N, n) +#' pik <- as.vector(Pik(p, Ind)) +#' # Select first sample (units 1 and 2) +#' sam <- c(1, 2) +#' HT(y[sam], pik[sam]) -HT<-function(y,Pik){ -y<-t(as.matrix(y)) -pik<-as.matrix(Pik) -HT<-y%*%(1/Pik) -HT +HT <- function(y, Pik) { + y <- t(as.matrix(y)) + pik <- as.matrix(Pik) + result <- y %*% (1/Pik) + result } \ No newline at end of file diff --git a/R/IPFP.r b/R/IPFP.r index 8259b3b..e07ae1c 100644 --- a/R/IPFP.r +++ b/R/IPFP.r @@ -1,34 +1,74 @@ #' @export +#' +#' @title +#' Iterative Proportional Fitting Procedure (Raking) +#' @description +#' Adjusts a contingency table so that its row and column marginals match +#' known population totals, using the Iterative Proportional Fitting +#' Procedure (IPFP), also known as raking or RAS algorithm. +#' @return +#' A matrix with \code{nrow(Table) + 1} rows and \code{ncol(Table) + 1} +#' columns containing the adjusted cell counts, with an added row of +#' estimated column totals and an added column of estimated row totals. +#' @details +#' The algorithm alternates between row and column adjustments until +#' convergence. At each step, cells in each row (or column) are multiplied +#' by the ratio of the known marginal to the current estimated marginal. +#' Convergence is assessed by the sum of absolute differences between +#' known and estimated marginals. +#' @author Hugo Andres Gutierrez Rojas +#' @param Table A matrix or data frame of initial cell counts or weights to +#' be adjusted. +#' @param Col.knw Numeric vector of known column marginal totals. +#' @param Row.knw Numeric vector of known row marginal totals. +#' @param tol Convergence tolerance. The algorithm stops when the total +#' absolute deviation between known and estimated marginals is below +#' \code{tol}. Default is \code{0.0001}. +#' +#' @references +#' Deming, W.E. and Stephan, F.F. (1940). On a least squares adjustment of +#' a sampled frequency table when the expected marginal totals are known. +#' \emph{Annals of Mathematical Statistics}, 11(4), 427-444.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Domains}}, \code{\link{Wk}} +#' +#' @examples +#' # A 2x2 table to be raked to known marginals +#' Table <- matrix(c(10, 20, 30, 40), nrow = 2) +#' Row.knw <- c(40, 60) +#' Col.knw <- c(35, 65) +#' IPFP(Table, Col.knw, Row.knw) -IPFP <- function(Table,Col.knw,Row.knw,tol=0.0001) -{ -Table <- as.matrix(Table) -Col.est <- colSums(Table) -Row.est <- rowSums(Table) -I <- length(Row.knw) -J <- length(Col.knw) -Est <- Table -criterio <- sum(abs(Col.knw-Col.est)) + sum(abs(Row.knw-Row.est)) -while(criterio > tol){ -for(i in 1:I){ -for(j in 1:J){ -Est[i,j] <- Est[i,j]*Row.knw[i]/Row.est[i] -} -} -Col.est <- colSums(Est) -Row.est <- rowSums(Est) -criterio <- sum(abs(Col.knw-Col.est)) + sum(abs(Row.knw-Row.est)) -for(i in 1:I){ -for(j in 1:J){ -Est[i,j] <- Est[i,j]*Col.knw[j]/Col.est[j] -} -} -Col.est <- colSums(Est) -Row.est <- rowSums(Est) -criterio <- sum(abs(Col.knw-Col.est)) + sum(abs(Row.knw-Row.est)) -} -p1 <- rbind(Est,Col.est) -p2 <- cbind(p1,c(Row.est,sum(Row.est))) -colnames(p2)[J+1] <- c("Row.est") -return(p2) +IPFP <- function(Table, Col.knw, Row.knw, tol = 0.0001) { + Table <- as.matrix(Table) + Col.est <- colSums(Table) + Row.est <- rowSums(Table) + I <- length(Row.knw) + J <- length(Col.knw) + Est <- Table + criterio <- sum(abs(Col.knw - Col.est)) + sum(abs(Row.knw - Row.est)) + while (criterio > tol) { + for (i in 1:I) { + for (j in 1:J) { + Est[i, j] <- Est[i, j] * Row.knw[i]/Row.est[i] + } + } + Col.est <- colSums(Est) + Row.est <- rowSums(Est) + criterio <- sum(abs(Col.knw - Col.est)) + sum(abs(Row.knw - Row.est)) + for (i in 1:I) { + for (j in 1:J) { + Est[i, j] <- Est[i, j] * Col.knw[j]/Col.est[j] + } + } + Col.est <- colSums(Est) + Row.est <- rowSums(Est) + criterio <- sum(abs(Col.knw - Col.est)) + sum(abs(Row.knw - Row.est)) + } + p1 <- rbind(Est, Col.est) + p2 <- cbind(p1, c(Row.est, sum(Row.est))) + colnames(p2)[J + 1] <- c("Row.est") + return(p2) } diff --git a/R/Ik.r b/R/Ik.r index 40ec4c5..116fd74 100644 --- a/R/Ik.r +++ b/R/Ik.r @@ -1,15 +1,50 @@ #' @export +#' +#' @title +#' Sample Membership Indicator Matrix +#' @description +#' Constructs the indicator matrix of the sampling support for a fixed-size +#' without-replacement design. Each row corresponds to one possible sample +#' and each column to one population unit. +#' @return +#' A binary matrix of dimension \code{choose(N, n) x N}, where entry +#' \eqn{(s, k) = 1} if unit \eqn{k} belongs to sample \eqn{s}, and 0 +#' otherwise. +#' @details +#' The full enumeration of all \code{choose(N, n)} possible samples is +#' computationally feasible only for small populations. For \code{N > 15} +#' this function will be very slow. It is intended primarily for theoretical +#' illustrations and teaching purposes. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. Recommended \code{N <= 15}. +#' @param n Sample size. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Pik}}, \code{\link{Pikl}}, \code{\link{Support}} +#' +#' @examples +#' # All possible samples of size n = 2 from N = 4 units +#' N <- 4 +#' n <- 2 +#' Ik(N, n) +#' # Number of rows equals choose(N, n) = 6 +#' nrow(Ik(N, n)) == choose(N, n) -Ik <- function(N,n){ -Q <- Support(N,n,ID=FALSE) -I <- matrix(0,choose(N,n),N) -for(i in 1:n){ -for(j in 1:choose(N,n)){ -for(k in 1:N){ -if (Q[j,i]==k) -I[j,k] <- 1 -} -} -} -I +Ik <- function(N, n) { + Q <- Support(N, n, ID = FALSE) + I <- matrix(0, choose(N, n), N) + for (i in 1:n) { + for (j in 1:choose(N, n)) { + for (k in 1:N) { + if (Q[j, i] == k) + I[j, k] <- 1 + } + } + } + I } \ No newline at end of file diff --git a/R/IkRS.r b/R/IkRS.r index cd5113b..41b4a93 100644 --- a/R/IkRS.r +++ b/R/IkRS.r @@ -1,9 +1,41 @@ #' @export +#' +#' @title +#' Sample Membership Indicator Matrix for All Possible Sample Sizes +#' @description +#' Constructs the indicator matrix of the complete sampling support, stacking +#' the indicator matrices for all sample sizes from 1 to \code{N}. This +#' covers every possible non-empty subset of the population. +#' @return +#' A binary matrix with \eqn{2^N} rows (one per non-empty subset, including +#' the empty set as the first row of zeros) and \code{N} columns. Entry +#' \eqn{(s, k) = 1} if unit \eqn{k} belongs to subset \eqn{s}. +#' @details +#' This function calls \code{\link{Ik}} for each possible sample size +#' \eqn{n = 1, \ldots, N} and stacks the results. It is intended for small +#' populations only (\code{N <= 10}) due to the exponential growth of the +#' support size. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. Recommended \code{N <= 10}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Ik}}, \code{\link{SupportRS}} +#' +#' @examples +#' # Full indicator matrix for N = 3 +#' IkRS(3) +#' # Number of rows: 1 (empty) + 3 + 3 + 1 = 8 = 2^3 +#' nrow(IkRS(3)) -IkRS <- function(N){ -sam <- matrix(0, ncol=N, nrow=1) -for(k in 1:N){ -sam<-rbind(sam, Ik(N,k)) -} -sam +IkRS <- function(N) { + sam <- matrix(0, ncol = N, nrow = 1) + for (k in 1:N) { + sam <- rbind(sam, Ik(N, k)) + } + sam } \ No newline at end of file diff --git a/R/IkWR.r b/R/IkWR.r index b6f9f3a..3eee64c 100644 --- a/R/IkWR.r +++ b/R/IkWR.r @@ -1,16 +1,51 @@ #' @export +#' +#' @title +#' Frequency Indicator Matrix for With-Replacement Sampling +#' @description +#' Constructs the indicator matrix of the with-replacement sampling support +#' for a population of size \code{N} and \code{m} draws. Each row corresponds +#' to one possible ordered outcome and each column to one population unit, +#' with entry \eqn{(s, k) = 1} if unit \eqn{k} was selected at least once +#' in outcome \eqn{s}. +#' @return +#' A binary matrix of dimension \code{choose(N+m-1, m) x N}, where entry +#' \eqn{(s, k) = 1} if unit \eqn{k} appears in the \eqn{s}-th outcome of +#' the with-replacement support, and 0 otherwise. +#' @details +#' The with-replacement support is enumerated via \code{\link{SupportWR}}. +#' This function is intended for small populations and few draws only, as the +#' support grows rapidly with \code{N} and \code{m}. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. Keep small due to combinatorial growth. +#' @param m Number of draws (sample size with replacement). +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Ik}}, \code{\link{SupportWR}}, \code{\link{nk}} +#' +#' @examples +#' # With-replacement support: N = 3 units, m = 2 draws +#' N <- 3 +#' m <- 2 +#' IkWR(N, m) +#' # Number of rows = choose(N + m - 1, m) = choose(4, 2) = 6 +#' nrow(IkWR(N, m)) == choose(N + m - 1, m) -IkWR <- function(N, m) -{ -Q <- SupportWR(N, m, ID = FALSE) -I <- matrix(0, choose(N+m-1, m), N) -for (i in 1:m) { -for (j in 1:choose(N+m-1, m)) { -for (k in 1:N) { -if (Q[j, i] == k) -I[j, k] <- 1 -} -} -} -I +IkWR <- function(N, m) { + Q <- SupportWR(N, m, ID = FALSE) + I <- matrix(0, choose(N + m - 1, m), N) + for (i in 1:m) { + for (j in 1:choose(N + m - 1, m)) { + for (k in 1:N) { + if (Q[j, i] == k) + I[j, k] <- 1 + } + } + } + I } \ No newline at end of file diff --git a/R/OrderWR.r b/R/OrderWR.r index 1e925d7..78728ec 100644 --- a/R/OrderWR.r +++ b/R/OrderWR.r @@ -1,46 +1,79 @@ #' @export +#' +#' @title +#' Ordered With-Replacement Sampling Support +#' @description +#' Enumerates all ordered sequences of \code{m} draws from a population of +#' size \code{N} with replacement. Unlike \code{\link{SupportWR}}, this +#' function considers order, so sequences that differ only in draw order are +#' treated as distinct outcomes. +#' @return +#' A matrix with \code{N^m} rows and \code{m} columns, where each row is one +#' ordered sequence of draws. If \code{ID} is provided, population labels are +#' substituted for indices. +#' @details +#' The total number of ordered with-replacement sequences of size \code{m} +#' from \code{N} units is \eqn{N^m}. This grows rapidly and the function +#' should only be used for small \code{N} and \code{m}. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param m Number of draws. +#' @param ID Optional vector of population labels of length \code{N}. +#' If provided, labels are substituted for integer indices in the output. +#' If \code{FALSE} (default), integer indices are returned. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{SupportWR}}, \code{\link{IkWR}} +#' +#' @examples +#' # All ordered sequences of 2 draws from N = 3 units +#' OrderWR(N = 3, m = 2) +#' # N^m = 9 rows +#' +#' # With population labels +#' U <- c("A", "B", "C") +#' OrderWR(N = 3, m = 2, ID = U) -OrderWR<-function(N,m,ID=FALSE){ -b<-c(1:N) -grilla<-function(a){ -A<-seq(1:length(a)) -unoA <-rep(1,length(A)) -B<-seq(1:length(a)) -unoB <-rep(1,length(B)) -P1<-kronecker(A,unoB) -P2<-kronecker(unoA,B) -grid<-matrix(cbind(P1,P2),ncol=2) -return(grid) -} - -if(m==1){ -sam<-as.matrix(b) -} - -if(m==2){ -sam<-grilla(b) -} - -if(m>2){ -sam<-grilla(b) -for(l in 3:m){ -Sam1<-rep(0,l) -for(j in 1:dim(sam)[1]){ -for(k in 1:length(b)){ -Sam1<-rbind(Sam1,c(sam[j,],b[k])) -} } -sam<-Sam1[-1,] -} -} -if (is.logical(ID) == TRUE){return(sam)} -else{ -a<-dim(sam) -val<-matrix(NA,a[1],a[2]) -for(ii in 1:(dim(val)[1])){ -for(jj in 1:(dim(val)[2])){ -val[ii,jj]<-ID[sam[ii,jj]] -} -} -return(val) -} +OrderWR <- function(N, m, ID = FALSE) { + b <- c(1:N) + grilla <- function(a) { + A <- seq(1:length(a)) + unoA <- rep(1, length(A)) + B <- seq(1:length(a)) + unoB <- rep(1, length(B)) + P1 <- kronecker(A, unoB) + P2 <- kronecker(unoA, B) + grid <- matrix(cbind(P1, P2), ncol = 2) + return(grid) + } + if (m == 1) sam <- as.matrix(b) + if (m == 2) sam <- grilla(b) + if (m > 2) { + sam <- grilla(b) + for (l in 3:m) { + Sam1 <- rep(0, l) + for (j in 1:dim(sam)[1]) { + for (k in 1:length(b)) { + Sam1 <- rbind(Sam1, c(sam[j, ], b[k])) + } + } + sam <- Sam1[-1, ] + } + } + if (is.logical(ID) == TRUE) return(sam) + else { + a <- dim(sam) + val <- matrix(NA, a[1], a[2]) + for (ii in 1:(dim(val)[1])) { + for (jj in 1:(dim(val)[2])) { + val[ii, jj] <- ID[sam[ii, jj]] + } + } + return(val) + } } \ No newline at end of file diff --git a/R/Pik.r b/R/Pik.r index d3f4d39..4f281d9 100644 --- a/R/Pik.r +++ b/R/Pik.r @@ -1,7 +1,50 @@ #' @export +#' +#' @title +#' First-Order Inclusion Probabilities from a Sampling Design +#' @description +#' Computes the first-order inclusion probabilities for each unit in a finite +#' population, given the probability of each possible sample and the indicator +#' matrix of the sampling support. +#' @return +#' A row vector (1 x N matrix) of first-order inclusion probabilities +#' \eqn{\pi_k = P(k \in s)} for each unit \eqn{k} in the population. +#' @details +#' The inclusion probability of unit \eqn{k} is computed as the sum of the +#' probabilities of all samples that contain unit \eqn{k}: +#' \deqn{\pi_k = \sum_{s \ni k} p(s)} +#' The indicator matrix \code{Ind} (output of \code{\link{Ik}}) has one row +#' per possible sample and one column per population unit, with entry 1 if +#' unit \eqn{k} is in sample \eqn{s} and 0 otherwise. +#' @author Hugo Andres Gutierrez Rojas +#' @param p Vector of probabilities for each possible sample in the support. +#' Must sum to 1. +#' @param Ind Indicator matrix of the sampling support, as returned by +#' \code{\link{Ik}}. Rows are samples, columns are population units. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Ik}}, \code{\link{Pikl}}, \code{\link{PikPPS}} +#' +#' @examples +#' # Population of size N = 5, sample size n = 2 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) +#' n <- 2 +#' # Sample probabilities (one per possible sample) +#' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) +#' Ind <- Ik(N, n) +#' pik <- Pik(p, Ind) +#' pik +#' # Check: inclusion probabilities sum to n +#' sum(pik) -Pik <- function(p, Ind){ -multip <- p*Ind -pik <- colSums(multip) -t(pik) +Pik <- function(p, Ind) { + multip <- p * Ind + pik <- colSums(multip) + t(pik) } \ No newline at end of file diff --git a/R/PikHol.r b/R/PikHol.r index 469bdab..0d61bbe 100644 --- a/R/PikHol.r +++ b/R/PikHol.r @@ -1,18 +1,64 @@ #' @export +#' +#' @title +#' Optimal Inclusion Probabilities for Multiple Surveys (Holmberg) +#' @description +#' Computes optimal first-order inclusion probabilities for a population that +#' is surveyed on multiple occasions, minimising a measure of total variance +#' across surveys. This implements the approach of Holmberg (2002) for +#' coordinated sampling over time. +#' @return +#' A numeric vector of length \code{N} with the optimal inclusion probability +#' for each unit in the population. +#' @details +#' For each survey \eqn{k}, the initial inclusion probabilities are computed +#' via \code{\link{PikPPS}}. An optimal composite size measure is then derived +#' by combining the per-survey auxiliary variables through a weighted sum, and +#' the final inclusion probabilities are computed proportional to the square +#' root of this composite. The resulting sample size \code{n.st} is chosen to +#' minimise total variance subject to a relative precision target \code{e}. +#' @author Hugo Andres Gutierrez Rojas +#' @param n Integer vector of length \code{p} with the desired sample size +#' for each of the \code{p} surveys. +#' @param sigma Matrix of dimension \code{N x p} where column \eqn{k} contains +#' the auxiliary size variable for survey \eqn{k}. +#' @param e Scalar. Relative tolerance parameter controlling the precision +#' target across surveys. +#' @param Pi Optional matrix of dimension \code{N x p} with initial inclusion +#' probabilities for each survey. If omitted, \code{\link{PikPPS}} is used. +#' +#' @references +#' Holmberg, A. (2002). A multiparameter perspective on the choice of sampling +#' design in surveys. \emph{Statistics in Transition}, 5(6), 969-994.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{PikPPS}}, \code{\link{PikSTPPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' # Two surveys with different auxiliary variables +#' sigma <- cbind(Employees, Income) +#' n <- c(100, 150) +#' pik <- PikHol(n, sigma, e = 0.1) +#' sum(pik <= 1) # all valid probabilities -PikHol <- function(n, sigma, e, Pi = PiDefault){ +PikHol <- function(n, sigma, e, Pi = NULL) { N <- dim(sigma)[1] p <- length(n) - PiDefault <- matrix(NA, nrow = N, ncol = p) - A <- matrix(NA, nrow = N, ncol = p) - for (k in 1:p) { - PiDefault[,k] <- PikPPS(n[k], sigma[,k]) + if (is.null(Pi)) { + Pi <- matrix(NA, nrow = N, ncol = p) + for (k in 1:p) { + Pi[, k] <- PikPPS(n[k], sigma[, k]) + } } + A <- matrix(NA, nrow = N, ncol = p) for (k in 1:p) { - A[,k] <- sigma[,k] ^ 2/(sum(((1 / Pi[,k]) - 1)*sigma[,k] ^ 2)) + A[, k] <- sigma[, k]^2/(sum(((1/Pi[, k]) - 1) * sigma[, k]^2)) } - aqk <- rowSums(A) - n.st <- ceiling(((sum(sqrt(aqk))) ^ 2)/((1 + e) * p + (sum(aqk)))) + aqk <- rowSums(A) + n.st <- ceiling(((sum(sqrt(aqk)))^2)/((1 + e) * p + (sum(aqk)))) pikopt <- PikPPS(n.st, sqrt(aqk)) return(pikopt) -} +} \ No newline at end of file diff --git a/R/PikPPS.r b/R/PikPPS.r index 0ebb695..cab08ea 100644 --- a/R/PikPPS.r +++ b/R/PikPPS.r @@ -1,15 +1,53 @@ #' @export +#' +#' @title +#' Inclusion Probabilities Proportional to Size +#' @description +#' Computes first-order inclusion probabilities proportional to an auxiliary +#' size variable \code{x} for a without-replacement sample of size \code{n}. +#' A sequential truncation algorithm ensures all probabilities are at most 1. +#' @return +#' A numeric vector of length \code{N} with the first-order inclusion +#' probability for each unit in the population. Values are in \code{(0, 1]}. +#' @details +#' The initial probabilities \eqn{\pi_k = n x_k / \sum x} may exceed 1 for +#' large units. The algorithm iteratively sets those probabilities to 1 and +#' redistributes the remaining sample size among the other units until all +#' probabilities are valid. The result satisfies \eqn{\sum \pi_k = n}. +#' @author Hugo Andres Gutierrez Rojas +#' @param n Desired sample size. +#' @param x Vector of length \code{N} with positive auxiliary size values +#' for each unit in the population. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.piPS}}, \code{\link{PikSTPPS}}, \code{\link{PikHol}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' Pik <- PikPPS(n, Employees) +#' # Check: sum equals n +#' sum(Pik) +#' # All values are valid probabilities +#' all(Pik > 0 & Pik <= 1) -PikPPS<-function(n,x){ -pik<- n*x/sum(x) -while((sum(pik>1))!=0){ -s<-which(pik>=1) -new=(1:length(pik))[-s] -pik[s]=1 -txnew<-sum(x[s]) -for(k in new){ -pik[k]<- (n-length(s))*x[k]/(sum(x)-txnew) -} -} -pik +PikPPS <- function(n, x) { + pik <- n * x/sum(x) + while ((sum(pik > 1)) != 0) { + s <- which(pik >= 1) + new <- (1:length(pik))[-s] + pik[s] <- 1 + txnew <- sum(x[s]) + for (k in new) { + pik[k] <- (n - length(s)) * x[k]/(sum(x) - txnew) + } + } + pik } \ No newline at end of file diff --git a/R/Pikl.r b/R/Pikl.r index 77892b1..4eec0cd 100644 --- a/R/Pikl.r +++ b/R/Pikl.r @@ -1,20 +1,52 @@ #' @export +#' +#' @title +#' Second-Order Inclusion Probabilities +#' @description +#' Computes the matrix of second-order inclusion probabilities +#' \eqn{\pi_{kl} = P(k \in s \text{ and } l \in s)} for all pairs of units +#' in a finite population of size \code{N} under a fixed-size sampling design. +#' @return +#' An \code{N x N} matrix where entry \eqn{(k, l)} is the probability that +#' both units \eqn{k} and \eqn{l} are included in the same sample. Diagonal +#' entries \eqn{(k,k)} equal the first-order inclusion probability \eqn{\pi_k}. +#' @details +#' The second-order inclusion probabilities are needed to compute the exact +#' Horvitz-Thompson variance estimator and the Sen-Yates-Grundy variance +#' estimator. This function enumerates the full sampling support via +#' \code{\link{Ik}} and is therefore only feasible for small populations +#' (\code{N <= 15}). +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. Keep small (recommended \code{N <= 15}) due to +#' the combinatorial enumeration of all possible samples. +#' @param n Sample size. +#' @param p Vector of probabilities for each possible sample in the support. +#' Must sum to 1. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Pik}}, \code{\link{Deltakl}}, \code{\link{VarHT}} +#' +#' @examples +#' # Population N = 5, sample size n = 2 +#' N <- 5 +#' n <- 2 +#' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) +#' pi2 <- Pikl(N, n, p) +#' pi2 -Pikl <- function(N,n,p){ -# The support -Sam <- Ik(N,n) -# Two columns for index k and index l -Ind <- OrderWR(N,2) -# Creation of the indicator vectors k and l -K <- matrix(c(Sam[,Ind]),ncol=2) -L <- t(t(K[,1])*K[,2]) -# Vectors of indicators k and l -# The first column is I11, the second is I12, etc.. -Ikl <- matrix(c(L),ncol=nrow(Ind)) -M <- p*Ikl -#Sum of the probabilities by column -O <- apply(M,2,sum) -# Creation of the matrix Pikl -P <- matrix(c(O),ncol=N) -return(P) -} +Pikl <- function(N, n, p) { + Sam <- Ik(N, n) + Ind <- OrderWR(N, 2) + K <- matrix(c(Sam[, Ind]), ncol = 2) + L <- t(t(K[, 1]) * K[, 2]) + Ikl <- matrix(c(L), ncol = nrow(Ind)) + M <- p * Ikl + O <- apply(M, 2, sum) + P <- matrix(c(O), ncol = N) + return(P) +} \ No newline at end of file diff --git a/R/S.BE.r b/R/S.BE.r index 743fbf3..ac416ca 100644 --- a/R/S.BE.r +++ b/R/S.BE.r @@ -1,11 +1,55 @@ #' @export +#' +#' @title +#' Bernoulli Sampling +#' @description +#' Draws a Bernoulli sample from a finite population of size \code{N}. +#' Each unit is independently selected with the same inclusion probability +#' \code{prob}. +#' @return +#' A vector of length \code{N} where selected units contain their population +#' index and non-selected units contain \code{0}. +#' @details +#' The sample size under Bernoulli sampling is random, following a +#' Binomial(\code{N}, \code{prob}) distribution. To extract the selected +#' indices, use \code{sam[sam != 0]}. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param prob Scalar. Inclusion probability, must satisfy \code{0 < prob <= 1}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.BE}}, \code{\link{S.PO}}, \code{\link{S.SI}} +#' +#' @examples +#' # Population of size N = 100, inclusion probability 10% +#' N <- 100 +#' prob <- 0.1 +#' sam <- S.BE(N, prob) +#' +#' # Extract selected indices +#' selected <- sam[sam != 0] +#' length(selected) # random, around 10 +#' +#' # Using Lucy data +#' data('Lucy') +#' N <- nrow(Lucy) +#' prob <- 0.05 +#' sam <- S.BE(N, prob) +#' sam <- sam[sam != 0] +#' y <- data.frame(Income = Lucy$Income[sam]) +#' E.BE(y, prob) -S.BE<-function(N,prob){ -sam<-matrix(0,N,1) -U<-runif(N) -for(k in 1:N){ -if(U[k]<=prob) -sam[k]<-k - } -return(sam) +S.BE <- function(N, prob) { + sam <- matrix(0, N, 1) + U <- runif(N) + for (k in 1:N) { + if (U[k] <= prob) + sam[k] <- k + } + return(sam) } \ No newline at end of file diff --git a/R/S.PO.r b/R/S.PO.r index 3819c16..1c7c6d3 100644 --- a/R/S.PO.r +++ b/R/S.PO.r @@ -1,11 +1,48 @@ #' @export +#' +#' @title +#' Poisson Sampling +#' @description +#' Draws a Poisson sample from a finite population of size \code{N}. +#' Each unit \eqn{k} is independently selected with its own inclusion +#' probability \eqn{\pi_k}. +#' @return +#' A vector of length \code{N} where selected units contain their population +#' index and non-selected units contain \code{0}. +#' @details +#' Poisson sampling is a generalisation of Bernoulli sampling that allows +#' unequal inclusion probabilities. The sample size is random. To extract +#' the selected indices, use \code{sam[sam != 0]}. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param Pik Vector of length \code{N} containing the first-order inclusion +#' probability for each unit in the population. Values must be in \code{(0, 1]}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.PO}}, \code{\link{PikPPS}}, \code{\link{S.piPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' Pik <- PikPPS(n, Employees) +#' sam <- S.PO(N, Pik) +#' sam <- sam[sam != 0] +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.PO(y, Pik[sam]) -S.PO<-function(N,Pik){ -sam<-matrix(0,N,1) -U<-runif(N) -for(k in 1:N){ -if(U[k]<=Pik[k]) -sam[k]<-k - } -return(sam) +S.PO <- function(N, Pik) { + sam <- matrix(0, N, 1) + U <- runif(N) + for (k in 1:N) { + if (U[k] <= Pik[k]) + sam[k] <- k + } + return(sam) } \ No newline at end of file diff --git a/R/S.PPS.r b/R/S.PPS.r index 157011d..43334ae 100644 --- a/R/S.PPS.r +++ b/R/S.PPS.r @@ -1,14 +1,53 @@ #' @export +#' +#' @title +#' Probability Proportional to Size With-Replacement Sampling +#' @description +#' Draws a with-replacement sample of size \code{m} from a finite population +#' using probabilities proportional to an auxiliary size variable \code{x}. +#' @return +#' A matrix with \code{m} rows and two columns: +#' \itemize{ +#' \item Column 1 (\code{sam}): population indices of the selected units. +#' \item Column 2 (\code{pk}): selection probability of each draw. +#' } +#' @details +#' At each draw, unit \eqn{k} is selected with probability +#' \eqn{p_k = x_k / \sum x}. Since sampling is with replacement, the same +#' unit may appear more than once. Use \code{\link{E.PPS}} or \code{\link{HH}} +#' to estimate population totals from this sample. +#' @author Hugo Andres Gutierrez Rojas +#' @param m Number of draws (sample size with replacement). +#' @param x Vector of length \code{N} containing positive auxiliary size +#' values for each unit in the population. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.PPS}}, \code{\link{HH}}, \code{\link{S.piPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' m <- 400 +#' res <- S.PPS(m, Employees) +#' sam <- res[, 1] +#' pk <- res[, 2] +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.PPS(y, pk) -S.PPS<-function(m,x){ -N<-length(x) -pk<-x/sum(x) -cumpk<-cumsum(pk) -U<-runif(m) -ints<-cbind(c(0,cumpk[-N]),cumpk) -sam<-rep(0,m) -for(i in 1:m){ - sam[i]<-which(U[i]>ints[,1] & U[i] ints[, 1] & U[i] < ints[, 2]) + } + return(cbind(sam, pk[sam])) +} \ No newline at end of file diff --git a/R/S.SI.r b/R/S.SI.r index 52e6b7e..a2121fb 100644 --- a/R/S.SI.r +++ b/R/S.SI.r @@ -1,15 +1,56 @@ #' @export +#' +#' @title +#' Simple Random Sampling Without Replacement +#' @description +#' Draws a simple random sample of size \code{n} without replacement from a +#' finite population of size \code{N} using the sequential algorithm of +#' Fan, Muller and Rezucha (1962). +#' @return +#' A vector of length \code{N} where selected units contain their population +#' index and non-selected units contain \code{0}. +#' @details +#' The sequential algorithm selects units one at a time by comparing a uniform +#' random variate with the conditional inclusion probability at each step, +#' ensuring exactly \code{n} units are selected. To extract the selected +#' indices, filter out the zeros: \code{sam[sam != 0]}. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param n Sample size. Must satisfy \code{n <= N}. +#' @param e Optional vector of \code{N} uniform random variates in \code{(0,1)}. +#' If omitted, \code{runif(N)} is used. Useful for reproducibility or +#' coordinated sampling. +#' +#' @references +#' Fan, C.T., Muller, M.E. and Rezucha, I. (1962). Development of sampling +#' plans by using sequential (item by item) selection techniques and digital +#' computers. \emph{Journal of the American Statistical Association}, +#' 57(298), 387-402.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.SI}}, \code{\link{S.STSI}}, \code{\link{S.SY}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' sam <- S.SI(N, n) +#' sam <- sam[sam != 0] +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.SI(N, n, y) -S.SI<-function(N,n,e=runif(N)) -{ -c<-matrix(0,N,1) -dec<-matrix(0,N,1) -sam<-matrix(0,N,1) -for(k in 1:N){ - c[k]<-(n-dec[k])/(N-k+1) - if(e[k] +#' @param S Vector of length \code{N} identifying the stratum membership of +#' each unit in the population. +#' @param x Vector of length \code{N} containing positive auxiliary size +#' values for each unit in the population. +#' @param mh Integer vector of length \code{H} specifying the number of +#' draws within each stratum. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.PPS}}, \code{\link{S.STpiPS}}, \code{\link{E.STPPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' mh <- c(20, 30, 50) +#' res <- S.STPPS(Level, Employees, mh) +#' head(res) +#' sam <- res$sam +#' pk <- res$pk +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.STPPS(y, pk, mh, Level[sam]) -S.STPPS<-function(S,x,mh) -{ -S<-as.factor(S) -S<-as.factor(as.integer(S)) -cum<-cumsum(mh) -sam<-matrix(0,sum(mh)) -pk<-matrix(0,sum(mh)) - -for(k in 1: length(mh)) -{ -h<-which(S==k) -Nh<-length(x[h]) -pkh<-x[h]/sum(x[h]) -cumpk<-cumsum(pkh) -U<-runif(mh[k]) -ints<-cbind(c(0,cumpk[-Nh]),cumpk) -sam.h<-rep(0,mh[k]) -pk.h<-rep(0,mh[k]) - -for(i in 1:mh[k]){ - sam.h[i]<-which(U[i]>ints[,1] & U[i]1){ -sam[(cum[k-1]+1):(cum[k])]<-h[sam.h] -pk[(cum[k-1]+1):(cum[k])]<-pk.h -} - -} -total<-data.frame(sam,pk) -total +S.STPPS <- function(S, x, mh) { + S <- as.factor(S) + S <- as.factor(as.integer(S)) + cum <- cumsum(mh) + sam <- matrix(0, sum(mh)) + pk <- matrix(0, sum(mh)) + for (k in 1:length(mh)) { + h <- which(S == k) + Nh <- length(x[h]) + pkh <- x[h]/sum(x[h]) + cumpk <- cumsum(pkh) + U <- runif(mh[k]) + ints <- cbind(c(0, cumpk[-Nh]), cumpk) + sam.h <- rep(0, mh[k]) + for (i in 1:mh[k]) { + sam.h[i] <- which(U[i] > ints[, 1] & U[i] < ints[, 2]) + } + pk.h <- pkh[sam.h] + if (k == 1) { + sam[1:mh[k]] <- h[sam.h] + pk[1:mh[k]] <- pk.h + } + if (k > 1) { + sam[(cum[k-1]+1):(cum[k])] <- h[sam.h] + pk[(cum[k-1]+1):(cum[k])] <- pk.h + } + } + data.frame(sam, pk) } \ No newline at end of file diff --git a/R/S.STSI.r b/R/S.STSI.r index b2b366a..bdd26a2 100644 --- a/R/S.STSI.r +++ b/R/S.STSI.r @@ -1,21 +1,55 @@ #' @export +#' +#' @title +#' Stratified Simple Random Sampling Without Replacement +#' @description +#' Draws a stratified simple random sample without replacement from a finite +#' population. Within each stratum, units are selected by simple random +#' sampling without replacement. +#' @return +#' A sorted vector of population indices of the selected units, of length +#' \code{sum(nh)}. +#' @details +#' The function selects \code{nh[h]} units from stratum \eqn{h} using +#' \code{base::sample}, and returns all selected indices sorted in ascending +#' order. Use \code{\link{E.STSI}} to estimate population totals from this +#' sample. +#' @author Hugo Andres Gutierrez Rojas +#' @param S Vector of length \code{N} identifying the stratum membership of +#' each unit in the population. +#' @param Nh Integer vector of length \code{H} with the population size of +#' each stratum. +#' @param nh Integer vector of length \code{H} with the sample size of each +#' stratum. Must satisfy \code{nh[h] <= Nh[h]} for all \code{h}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.STSI}}, \code{\link{S.SI}}, \code{\link{S.STpiPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' Nh <- as.numeric(table(Level)) +#' nh <- c(70, 100, 200) +#' sam <- S.STSI(Level, Nh, nh) +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.STSI(Level[sam], Nh, nh, y) -S.STSI<-function(S,Nh,nh) -{ -S<-as.factor(S) -S<-as.factor(as.integer(S)) -cum<-cumsum(nh) -sam<-matrix(0,sum(nh)) -for(k in 1: length(nh)){ -h<-which(S==k) -sam.h<-sample(Nh[k],nh[k]) -if(k==1){ -sam[1:nh[k]]<-h[sam.h] -} -if(k>1){ -sam[(cum[k-1]+1):(cum[k])]<-h[sam.h] -} -} -sort(sam) -} - +S.STSI <- function(S, Nh, nh) { + S <- as.factor(S) + S <- as.factor(as.integer(S)) + cum <- cumsum(nh) + sam <- matrix(0, sum(nh)) + for (k in 1:length(nh)) { + h <- which(S == k) + sam.h <- sample(Nh[k], nh[k]) + if (k == 1) sam[1:nh[k]] <- h[sam.h] + if (k > 1) sam[(cum[k-1]+1):(cum[k])] <- h[sam.h] + } + sort(sam) +} \ No newline at end of file diff --git a/R/S.STpiPS.R b/R/S.STpiPS.R index 5eb33ac..e6b7c46 100644 --- a/R/S.STpiPS.R +++ b/R/S.STpiPS.R @@ -1,25 +1,69 @@ #' @export +#' +#' @title +#' Stratified Probability Proportional to Size Without-Replacement Sampling +#' @description +#' Draws a stratified sample where within each stratum units are selected +#' using a probability proportional to size without-replacement (piPS) design. +#' @return +#' A matrix with \code{sum(nh)} rows and two columns, sorted by population +#' index: +#' \itemize{ +#' \item Column 1: population indices of the selected units. +#' \item Column 2: first-order inclusion probabilities of the selected units. +#' } +#' @details +#' Within each stratum \eqn{h}, the function calls \code{\link{S.piPS}} to +#' draw \code{nh[h]} units with probabilities proportional to \code{x}. +#' The global population indices are preserved in the output. +#' @author Hugo Andres Gutierrez Rojas +#' @param S Vector of length \code{N} identifying the stratum membership of +#' each unit in the population. +#' @param x Vector of length \code{N} containing positive auxiliary size +#' values for each unit in the population. +#' @param nh Integer vector of length \code{H} specifying the sample size +#' within each stratum. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.piPS}}, \code{\link{S.STSI}}, \code{\link{E.STpiPS}}, +#' \code{\link{PikSTPPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n1 <- 70; n2 <- 100; n3 <- 200 +#' nh <- c(n1, n2, n3) +#' res <- S.STpiPS(Level, Employees, nh) +#' head(res) +#' sam <- res[, 1] +#' Pik <- res[, 2] +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.STpiPS(y, Pik, Level[sam]) -S.STpiPS<-function(S, x, nh) -{ - S<-as.factor(S) - S<-as.factor(as.integer(S)) - res<-matrix(NA, nrow = sum(nh), ncol=2) - cum<-cumsum(nh) - - for(k in 1: length(nh)){ - h <- which(S==k) +S.STpiPS <- function(S, x, nh) { + S <- as.factor(S) + S <- as.factor(as.integer(S)) + res <- matrix(NA, nrow = sum(nh), ncol = 2) + cum <- cumsum(nh) + for (k in 1:length(nh)) { + h <- which(S == k) res.h <- S.piPS(nh[k], x[h]) - sam.h <- res.h[,1] - pik.h <- res.h[,2] - if(k==1){ - res[1:nh[k],1]<-h[sam.h] - res[1:nh[k],2]<-pik.h + sam.h <- res.h[, 1] + pik.h <- res.h[, 2] + if (k == 1) { + res[1:nh[k], 1] <- h[sam.h] + res[1:nh[k], 2] <- pik.h } - if(k>1){ - res[(cum[k-1]+1):(cum[k]),1]<-h[sam.h] - res[(cum[k-1]+1):(cum[k]),2]<-pik.h + if (k > 1) { + res[(cum[k-1]+1):(cum[k]), 1] <- h[sam.h] + res[(cum[k-1]+1):(cum[k]), 2] <- pik.h } } res[order(res[, 1]), ] -} +} \ No newline at end of file diff --git a/R/S.SY.r b/R/S.SY.r index 57a4f24..571d4df 100644 --- a/R/S.SY.r +++ b/R/S.SY.r @@ -1,16 +1,51 @@ #' @export +#' +#' @title +#' Systematic Sampling +#' @description +#' Draws a systematic sample from a finite population of size \code{N} using +#' a fixed sampling interval \code{a}. A random start \code{r} is chosen +#' uniformly from \code{1} to \code{a}, and every \code{a}-th unit thereafter +#' is selected. +#' @return +#' A vector containing the population indices of the selected units. +#' @details +#' The random start \code{r} is drawn from \code{sample(a, 1)}, and then +#' units \eqn{r, r+a, r+2a, \ldots} are selected. If \code{N} is not a +#' multiple of \code{a}, the sample size varies by one unit depending on the +#' random start. Use \code{\link{E.SY}} to estimate population totals. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param a Sampling interval (skip). The expected sample size is +#' approximately \code{N/a}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.SY}}, \code{\link{S.SI}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' a <- 10 +#' sam <- S.SY(N, a) +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.SY(N, a, y) -S.SY<-function (N, a) -{ - r <- sample(a, 1) - c <- N - a * floor(N/a) - if (r <= c) - n <- floor((N/a)) + 1 - else n <- floor(N/a) - sam <- matrix(0, n, 1) - for (k in 0:n) { - sam[k] <- r + (a * (k - 1)) - } - sam -} - +S.SY <- function(N, a) { + r <- sample(a, 1) + c <- N - a * floor(N/a) + if (r <= c) + n <- floor((N/a)) + 1 + else + n <- floor(N/a) + sam <- matrix(0, n, 1) + for (k in 1:n) { + sam[k] <- r + (a * (k - 1)) + } + sam +} \ No newline at end of file diff --git a/R/S.WR.r b/R/S.WR.r index 4097b64..00a317f 100644 --- a/R/S.WR.r +++ b/R/S.WR.r @@ -1,22 +1,54 @@ #' @export +#' +#' @title +#' Simple Random Sampling With Replacement +#' @description +#' Draws a simple random sample of size \code{m} with replacement from a +#' finite population of size \code{N}. Returns the frequency of selection +#' for each unit drawn at least once. +#' @return +#' A vector of population indices of length \code{m}, where each element is +#' the index of a selected unit. Units may appear more than once. +#' @details +#' The number of times each unit is selected follows a multinomial +#' distribution with equal probabilities \eqn{1/N}. The function uses a +#' sequential binomial draw approach. Use \code{\link{E.WR}} to estimate +#' population totals. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param m Number of draws (sample size with replacement). +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.WR}}, \code{\link{S.SI}}, \code{\link{S.PPS}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' m <- 400 +#' sam <- S.WR(N, m) +#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +#' E.WR(N, m, y) -S.WR<-function(N,m){ -nk<-rep(0,N) - for(k in 1:N){ - suma<-sum(nk) - nk[k]<-rbinom(1,(m-suma),(1/(N-k+1))) - } -x<-which(nk>0) -w<-nk[x] -sam<-rep(x[1],w[1]) - -if(length(x)==1){ -return(sam)} - -if(length(x)>1){ -for(i in 2:length(x)){ -sam<-c(sam,rep(x[i],w[i])) - } -} -sam +S.WR <- function(N, m) { + nk <- rep(0, N) + for (k in 1:N) { + suma <- sum(nk) + nk[k] <- rbinom(1, (m - suma), (1/(N - k + 1))) + } + x <- which(nk > 0) + w <- nk[x] + sam <- rep(x[1], w[1]) + if (length(x) == 1) return(sam) + if (length(x) > 1) { + for (i in 2:length(x)) { + sam <- c(sam, rep(x[i], w[i])) + } + } + sam } \ No newline at end of file diff --git a/R/Support.r b/R/Support.r index b41f5ed..9c904e6 100644 --- a/R/Support.r +++ b/R/Support.r @@ -1,15 +1,50 @@ #' @export #' @import stats +#' +#' @title +#' Sampling Support for Fixed-Size Without-Replacement Designs +#' @description +#' Enumerates all possible samples of size \code{n} from a population of +#' size \code{N}, returning the complete sampling support as a matrix. +#' @return +#' A matrix with \code{choose(N, n)} rows and \code{n} columns. Each row +#' contains the indices (or labels if \code{ID} is provided) of the units +#' in one possible sample. Samples are listed in lexicographic order. +#' @details +#' This function uses a combinatorial algorithm to enumerate all +#' \code{choose(N, n)} subsets of size \code{n} from \eqn{\{1, \ldots, N\}}. +#' It is intended for small populations only. For \code{N > 15} it becomes +#' very slow. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. Recommended \code{N <= 15}. +#' @param n Sample size. +#' @param ID Optional vector of population labels of length \code{N}. +#' If provided, labels replace integer indices in the output. +#' If \code{FALSE} (default), integer indices are returned. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Ik}}, \code{\link{SupportWR}}, \code{\link{SupportRS}} +#' +#' @examples +#' # All samples of size 2 from a population of 5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' Support(N = 5, n = 2, ID = U) +#' +#' # Integer indices only +#' Support(N = 5, n = 2) Support <- function(N, n, ID = FALSE) { - m <- matrix(0, choose(N, n), n) + m <- matrix(0, choose(N, n), n) sam <- matrix(0, choose(N, n), n) - for (i in 1:n) - { + for (i in 1:n) { a <- 0 t <- i - for (r in 1:choose(N, n)) - { + for (r in 1:choose(N, n)) { a <- a + 1 B <- choose(N - t, n - i) if (a > B) { @@ -19,14 +54,10 @@ Support <- function(N, n, ID = FALSE) { if (t > N - n + i) { t <- m[r, i - 1] + 1 } - m[r, i] <- t + m[r, i] <- t sam[r, i] <- ID[t] } } - if (is.logical(ID) == TRUE) { - return(m) - } - else { - return(sam) - } + if (is.logical(ID) == TRUE) return(m) + else return(sam) } \ No newline at end of file diff --git a/R/SupportRS.r b/R/SupportRS.r index 464aa1b..cdb035a 100644 --- a/R/SupportRS.r +++ b/R/SupportRS.r @@ -1,15 +1,47 @@ #' @export +#' +#' @title +#' Complete Sampling Support for All Sample Sizes +#' @description +#' Enumerates all possible non-empty subsets of a population of size \code{N}, +#' covering all sample sizes from 1 to \code{N}. The result includes the +#' empty set as the first row. +#' @return +#' A matrix with \eqn{2^N} rows and \code{N} columns. Each row is one subset, +#' with \code{NA} used as padding for subsets smaller than \code{N}. The first +#' row represents the empty set (all zeros). +#' @details +#' This function stacks the outputs of \code{\link{Support}} for all sample +#' sizes \eqn{n = 1, \ldots, N}. It is only feasible for small populations +#' (\code{N <= 10}) due to exponential growth. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. Recommended \code{N <= 10}. +#' @param ID Optional vector of population labels of length \code{N}. +#' If provided, labels replace integer indices in the output. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{Support}}, \code{\link{IkRS}} +#' +#' @examples +#' # Complete support for N = 3 +#' SupportRS(3) +#' # 2^3 = 8 rows -SupportRS <- function(N, ID=FALSE){ -sam <- matrix(NA, ncol=N, nrow=1) -for(k in 1:N){ -sam<-rbind(sam, cbind(Support(N,k),matrix(NA,ncol=N-k, nrow=choose(N,k)))) -} -if (is.logical(ID) == TRUE){ -return(sam) -} -else{ -sam<-matrix(ID[SupportRS(N)],nrow=2^N) -return(sam) -} +SupportRS <- function(N, ID = FALSE) { + sam <- matrix(NA, ncol = N, nrow = 1) + for (k in 1:N) { + sam <- rbind(sam, + cbind(Support(N, k), + matrix(NA, ncol = N - k, nrow = choose(N, k)))) + } + if (is.logical(ID) == TRUE) return(sam) + else { + sam <- matrix(ID[SupportRS(N)], nrow = 2^N) + return(sam) + } } \ No newline at end of file diff --git a/R/SupportWR.r b/R/SupportWR.r index 67f93bb..9601b22 100644 --- a/R/SupportWR.r +++ b/R/SupportWR.r @@ -1,36 +1,66 @@ #' @export +#' +#' @title +#' Sampling Support for With-Replacement Designs +#' @description +#' Enumerates all distinct unordered outcomes (multisets) of size \code{m} +#' drawn with replacement from a population of size \code{N}. +#' @return +#' A matrix with \code{choose(N+m-1, m)} rows and \code{m} columns. Each +#' row contains the (sorted) indices of one possible unordered outcome. +#' If \code{ID} is provided, population labels replace indices. +#' @details +#' The number of distinct unordered with-replacement outcomes of size \code{m} +#' from \code{N} units is \eqn{\binom{N+m-1}{m}}. This is much smaller than +#' the \eqn{N^m} ordered outcomes. The algorithm uses a nested loop to +#' generate all non-decreasing sequences of length \code{m} from +#' \eqn{\{1, \ldots, N\}}. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param m Number of draws (sample size with replacement). +#' @param ID Optional vector of population labels of length \code{N}. +#' If \code{FALSE} (default), integer indices are returned. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{IkWR}}, \code{\link{nk}}, \code{\link{p.WR}} +#' +#' @examples +#' # All unordered outcomes: N = 3, m = 2 +#' SupportWR(N = 3, m = 2) +#' # choose(3+2-1, 2) = choose(4,2) = 6 rows -SupportWR <- function(N, m, ID=FALSE){ -S=0 -a=rep(1,m) -P1<-a -S=S+1 -k=m -while(k>0){ -while(a[k]1) -k=k-1 -if(a[k] 0) { + while (a[k] < N) { + a[k] <- a[k] + 1 + P1 <- rbind(P1, a) + S <- S + 1 + } + if (k > 1) k <- k - 1 + if (a[k] < N) { + a[k] <- a[k] + 1 + k1 <- k + 1 + a[k1:m] <- a[k] + P1 <- rbind(P1, a) + S <- S + 1 + k <- m + } else { + if (k == 1) k <- 0 + } + } + nr <- choose(N + m - 1, m) + P1 <- matrix(P1, nrow = nr) + sam <- matrix(ID[P1], nrow = nr) + if (is.logical(ID) == TRUE) return(P1) + else return(sam) +} \ No newline at end of file diff --git a/R/T.SIC.r b/R/T.SIC.r index 901d7cb..ac22686 100644 --- a/R/T.SIC.r +++ b/R/T.SIC.r @@ -1,26 +1,64 @@ #' @export +#' +#' @title +#' Cluster Totals for Single-Stage Cluster Sampling +#' @description +#' Computes the total of each variable of interest within each cluster +#' (Primary Sampling Unit) in a single-stage cluster sample. +#' @return +#' A matrix with one row per cluster and one column per variable of interest +#' (plus a first column \code{Ni} with the cluster size). Row names are the +#' cluster labels. +#' @details +#' This function aggregates the sample data by cluster, producing the cluster- +#' level totals needed for estimation under single-stage cluster sampling. +#' The output can be passed directly to \code{\link{E.1SI}} or \code{\link{E.SI}} +#' treating each cluster total as an observation. +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector, matrix or data frame containing the values of the +#' variables of interest for every unit in the sample. +#' @param Cluster Vector identifying the cluster (PSU) membership of each +#' unit in the sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.1SI}}, \code{\link{E.2SI}} +#' +#' @examples +#' library(dplyr) +#' data('BigCity') +#' UI <- levels(as.factor(BigCity$PSU)) +#' NI <- length(UI) +#' nI <- 10 +#' sam <- S.SI(NI, nI) +#' sampleI <- UI[sam[sam != 0]] +#' CityI <- BigCity[BigCity$PSU %in% sampleI, ] +#' y <- data.frame(Income = CityI$Income, +#' Expenditure = CityI$Expenditure) +#' cluster <- CityI$PSU +#' T.SIC(y, cluster) -T.SIC<-function(y,Cluster){ - - Cluster<-as.factor(Cluster) - y<-cbind(1,y) - y<-as.data.frame(y) +T.SIC <- function(y, Cluster) { + Cluster <- as.factor(Cluster) + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "Ni" - - nI<-length(levels(Cluster)) - - Total<-matrix(NA,nrow=nI,ncol=dim(y)[2],) - rownames(Total)<-levels(Cluster) - colnames(Total)<-names(y) - Cluster<-as.factor(as.integer(Cluster)) - - for(k in 1: nI){ - e<-which(Cluster==k) - ye<-y[e,] - ye<-as.matrix(ye) - tye<-colSums(ye) - Total[k,]<-tye + nI <- length(levels(Cluster)) + Total <- matrix(NA, nrow = nI, ncol = dim(y)[2]) + rownames(Total) <- levels(Cluster) + colnames(Total) <- names(y) + Cluster <- as.factor(as.integer(Cluster)) + for (k in 1:nI) { + e <- which(Cluster == k) + ye <- y[e, ] + ye <- as.matrix(ye) + tye <- colSums(ye) + Total[k, ] <- tye } - Total<-as.matrix(Total) + Total <- as.matrix(Total) return(Total) } \ No newline at end of file diff --git a/R/VarHT.r b/R/VarHT.r index 66a178f..33a0575 100644 --- a/R/VarHT.r +++ b/R/VarHT.r @@ -1,13 +1,53 @@ #' @export +#' +#' @title +#' Exact Variance of the Horvitz-Thompson Estimator +#' @description +#' Computes the exact variance of the Horvitz-Thompson estimator of the +#' population total for a given fixed-size without-replacement sampling design, +#' using the full sampling support. +#' @return +#' A scalar: the exact variance of the Horvitz-Thompson estimator +#' \eqn{V(\hat{t}_{y,\pi})}. +#' @details +#' The exact Horvitz-Thompson variance is: +#' \deqn{V(\hat{t}_{y,\pi}) = \sum_{k=1}^N \sum_{l=1}^N \Delta_{kl} +#' \frac{y_k}{\pi_k} \frac{y_l}{\pi_l}} +#' where \eqn{\Delta_{kl} = \pi_{kl} - \pi_k \pi_l}. This requires +#' enumerating the full support and is only feasible for small populations +#' (\code{N <= 15}). +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector of length \code{N} with the population values of the +#' variable of interest. +#' @param N Population size. Recommended \code{N <= 15}. +#' @param n Sample size. +#' @param p Vector of probabilities for each possible sample in the support. +#' Must sum to 1. +#' +#' @references +#' Horvitz, D.G. and Thompson, D.J. (1952). A generalization of sampling +#' without replacement from a finite universe. +#' \emph{Journal of the American Statistical Association}, 47, 663-685.\cr +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer. +#' +#' @seealso \code{\link{Deltakl}}, \code{\link{VarSYGHT}}, \code{\link{HT}} +#' +#' @examples +#' N <- 5 +#' n <- 2 +#' y <- c(32, 34, 46, 89, 35) +#' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) +#' VarHT(y, N, n, p) -VarHT<-function(y, N, n, p){ -Ind <- Ik(N,n) -pi1 <- as.matrix(Pik(p, Ind)) -pi2 <- Pikl(N,n,p) -Delta <- Deltakl(N,n,p) -y <- t(as.matrix(y)) -ykylexp <- t(y/pi1)%*%(y/pi1) -A <- (Delta)*(ykylexp) -Var <- sum(A) -return(Var) +VarHT <- function(y, N, n, p) { + Ind <- Ik(N, n) + pi1 <- as.matrix(Pik(p, Ind)) + pi2 <- Pikl(N, n, p) + Delta <- Deltakl(N, n, p) + y <- t(as.matrix(y)) + ykylexp <- t(y/pi1) %*% (y/pi1) + A <- (Delta) * (ykylexp) + Var <- sum(A) + return(Var) } \ No newline at end of file diff --git a/R/Wk.r b/R/Wk.r index eabbb44..0d57563 100644 --- a/R/Wk.r +++ b/R/Wk.r @@ -1,16 +1,60 @@ #' @export +#' +#' @title +#' GREG Generalised Weights +#' @description +#' Computes the generalised regression (GREG) weights for each unit in the +#' sample. These weights incorporate both the sampling design weights and a +#' calibration adjustment based on known population totals of auxiliary +#' variables. +#' @return +#' A numeric vector of length \code{n} with the GREG weight for each unit +#' in the sample. +#' @details +#' The GREG weight for unit \eqn{k} is: +#' \deqn{w_k = \frac{1}{\pi_k} + \mathbf{x}_k^T +#' \left(\sum_s \frac{v_k \mathbf{x}_k \mathbf{x}_k^T}{\pi_k}\right)^{-1} +#' (\mathbf{t}_x - \hat{\mathbf{t}}_{x,\pi})} +#' where \eqn{v_k = 1/(\pi_k c_k)} and \eqn{c_k} is a variance-stabilising +#' constant. The GREG estimator is then \eqn{\hat{t}_{GREG} = \sum_s w_k y_k}. +#' @author Hugo Andres Gutierrez Rojas +#' @param x Vector or matrix of auxiliary variables observed in the sample. +#' @param tx Vector of known population totals of the auxiliary variables. +#' @param Pik Vector of first-order inclusion probabilities for each unit +#' in the sample. +#' @param ck Vector of variance-stabilising constants. Typically \code{ck = 1} +#' (homoscedastic) or \code{ck = x} (heteroscedastic). +#' @param b0 Logical. If \code{TRUE}, an intercept column is prepended to +#' \code{x}. Default is \code{FALSE}. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{GREG.SI}}, \code{\link{E.Beta}} +#' +#' @examples +#' data('Lucy') +#' attach(Lucy) +#' N <- nrow(Lucy) +#' n <- 400 +#' sam <- S.SI(N, n) +#' Pik <- rep(n/N, n) +#' x <- as.matrix(Employees[sam]) +#' tx <- sum(Employees) +#' ck <- rep(1, n) +#' wk <- Wk(x, tx, Pik, ck) +#' # Check calibration: weighted sum of x equals tx +#' sum(wk * x) -Wk<-function(x,tx,Pik,ck,b0=FALSE){ - - if (b0 == TRUE){ - x<-as.matrix(cbind(1,x))} - if (b0 == FALSE){ - x<-as.matrix(x)} - - tx<-as.matrix(tx) - txpi<-as.matrix(t(x)%*%(1/Pik)) - V<-1/(Pik*ck) - - Wk<-(1/Pik)+((V*x)%*%solve(t(V*x)%*%x)%*%(tx-txpi)) - return(Wk) -} +Wk <- function(x, tx, Pik, ck, b0 = FALSE) { + if (b0 == TRUE) x <- as.matrix(cbind(1, x)) + if (b0 == FALSE) x <- as.matrix(x) + tx <- as.matrix(tx) + txpi <- as.matrix(t(x) %*% (1/Pik)) + V <- 1/(Pik * ck) + result <- (1/Pik) + ((V * x) %*% solve(t(V * x) %*% x) %*% (tx - txpi)) + return(result) +} \ No newline at end of file diff --git a/R/kish_allocation.R b/R/kish_allocation.R new file mode 100644 index 0000000..f6b16cf --- /dev/null +++ b/R/kish_allocation.R @@ -0,0 +1,89 @@ +#' @export +#' +#' @title +#' Kish Allocation for Stratified Sampling +#' @description +#' Computes the optimal sample size allocation across strata using the +#' Kish (1992) compromise allocation method, which interpolates between +#' uniform and proportional allocation through a design effect parameter \code{I}. +#' +#' @param n Integer. Total desired sample size. +#' @param N_h Named numeric vector. Population sizes for each stratum +#' \eqn{h = 1, \ldots, H}. +#' @param I Non-negative numeric. Intraclass correlation coefficient (ICC) +#' or design effect parameter controlling the allocation: +#' \itemize{ +#' \item \code{I = 0} → Uniform allocation (equal sample per stratum). +#' \item \code{I = Inf} → Proportional allocation (proportional to \eqn{N_h}). +#' \item \code{0 < I < Inf} → Compromise between uniform and proportional. +#' \item Recommended value: \code{I = 0.5} (Kish, 1992). +#' } +#' +#' @return A named integer vector of length \eqn{H} with the allocated sample +#' sizes per stratum. The values sum to approximately \code{n} (rounding may +#' cause a difference of ±1). +#' +#' @details +#' The Kish compromise allocation assigns sample sizes as: +#' \deqn{ +#' n_h = n \cdot \frac{\sqrt{I \, W_h^2 + H^{-2}}} +#' {\sum_{h=1}^{H} \sqrt{I \, W_h^2 + H^{-2}}} +#' } +#' where \eqn{W_h = N_h / N} is the stratum weight and \eqn{H} is the number +#' of strata. This formulation nests two classical allocations as limiting +#' cases: when \eqn{I = 0} the numerator reduces to \eqn{1/H} (uniform), +#' and as \eqn{I \to \infty} it is dominated by \eqn{W_h} (proportional). +#' +#' @references +#' Kish, L. (1992). Weighting for unequal \eqn{P_i}. +#' \emph{Journal of Official Statistics}, 8(2), 183–200. +#' +#' @author Yury Vanessa Ochoa Montes +#' +#' @seealso +#' \code{\link{E.STSI}} for estimation under stratified sampling, +#' \code{\link{S.STSI}} for stratified simple random sampling. +#' +#' @examples +#' N_h <- c( +#' Corozal = 41847, +#' Orange_Walk = 48175, +#' Belize = 57658, +#' Cayo = 78473, +#' Stann_Creek = 31347, +#' Toledo = 31711 +#' ) +#' +#' # Uniform allocation (I = 0) +#' kish_allocation(n = 3096, N_h = N_h, I = 0) +#' +#' # Proportional allocation (I -> Inf) +#' kish_allocation(n = 3096, N_h = N_h, I = 1e6) +#' +#' # Kish recommended compromise (I = 0.5) +#' kish_allocation(n = 3096, N_h = N_h, I = 0.5) + +kish_allocation <- function(n, N_h, I = 0.5) { + + if (!is.numeric(n) || length(n) != 1L || n <= 0 || n != round(n)) + stop("`n` must be a single positive integer.", call. = FALSE) + + if (!is.numeric(N_h) || any(N_h <= 0)) + stop("`N_h` must be a numeric vector of positive stratum sizes.", call. = FALSE) + + if (!is.numeric(I) || length(I) != 1L || I < 0) + stop("`I` must be a single non-negative number.", call. = FALSE) + + if (n > sum(N_h)) + stop("`n` cannot exceed the total population size sum(N_h).", call. = FALSE) + + H <- length(N_h) + W_h <- N_h / sum(N_h) + + num <- sqrt(I * W_h^2 + 1 / H^2) + n_h <- round(n * num / sum(num)) + + if (!is.null(names(N_h))) names(n_h) <- names(N_h) + + n_h +} \ No newline at end of file diff --git a/R/nk.r b/R/nk.r index 3783684..b8890e6 100644 --- a/R/nk.r +++ b/R/nk.r @@ -1,16 +1,48 @@ #' @export +#' +#' @title +#' Frequency Matrix for With-Replacement Sampling +#' @description +#' Constructs the frequency matrix of the with-replacement sampling support +#' for a population of size \code{N} and \code{m} draws. Each row corresponds +#' to one possible outcome and each column to one population unit, with entry +#' \eqn{(s, k)} equal to the number of times unit \eqn{k} was selected in +#' outcome \eqn{s}. +#' @return +#' An integer matrix of dimension \code{choose(N+m-1, m) x N}, where entry +#' \eqn{(s, k)} is the frequency of unit \eqn{k} in outcome \eqn{s}. +#' @details +#' Unlike \code{\link{IkWR}}, which records only whether a unit was selected, +#' this function records how many times each unit was selected. This is needed +#' for with-replacement estimators based on selection frequencies. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. Keep small due to combinatorial growth. +#' @param m Number of draws (sample size with replacement). +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{IkWR}}, \code{\link{SupportWR}}, \code{\link{p.WR}} +#' +#' @examples +#' # Frequency matrix: N = 3 units, m = 2 draws +#' N <- 3 +#' m <- 2 +#' nk(N, m) -nk <- function(N, m) -{ -Q <- SupportWR(N, m, ID = FALSE) -I <- matrix(0, choose(N+m-1, m), N) -for (i in 1:m) { -for (j in 1:choose(N+m-1, m)) { -for (k in 1:N) { -if (Q[j, i] == k) -I[j, k] <- sum(as.double(Q[j,]==k)) -} -} -} -I +nk <- function(N, m) { + Q <- SupportWR(N, m, ID = FALSE) + I <- matrix(0, choose(N + m - 1, m), N) + for (i in 1:m) { + for (j in 1:choose(N + m - 1, m)) { + for (k in 1:N) { + if (Q[j, i] == k) + I[j, k] <- sum(as.double(Q[j, ] == k)) + } + } + } + I } \ No newline at end of file diff --git a/R/p.WR.r b/R/p.WR.r index ad03606..cd4a95e 100644 --- a/R/p.WR.r +++ b/R/p.WR.r @@ -1,12 +1,49 @@ #' @export +#' +#' @title +#' Sample Probabilities under With-Replacement Sampling +#' @description +#' Computes the probability of each possible outcome in the with-replacement +#' sampling support, given unit selection probabilities \code{pk}. +#' @return +#' A numeric vector of length \code{choose(N+m-1, m)} with the probability +#' of each distinct unordered outcome in the with-replacement support. +#' @details +#' For each distinct unordered outcome (multiset) in the support enumerated +#' by \code{\link{nk}}, the probability is computed as a multinomial +#' probability: +#' \deqn{p(s) = \frac{m!}{\prod_k n_k!} \prod_k p_k^{n_k}} +#' where \eqn{n_k} is the number of times unit \eqn{k} appears in outcome +#' \eqn{s} and \eqn{p_k} is the selection probability of unit \eqn{k}. +#' @author Hugo Andres Gutierrez Rojas +#' @param N Population size. +#' @param m Number of draws (sample size with replacement). +#' @param pk Vector of length \code{N} with selection probabilities for each +#' unit. Must sum to 1. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{nk}}, \code{\link{SupportWR}}, \code{\link{S.PPS}} +#' +#' @examples +#' # N = 3 units, m = 2 draws, equal probabilities +#' N <- 3 +#' m <- 2 +#' pk <- c(1/3, 1/3, 1/3) +#' p <- p.WR(N, m, pk) +#' sum(p) # must equal 1 -p.WR <- function(N, m, pk){ -p <- rep(0,N) -I <- nk(N,m) -N <- dim(I)[1] -for(i in 1:N){ -ni <- c(I[i,]) -p[i] <- dmultinom(ni, prob=pk) -} -p +p.WR <- function(N, m, pk) { + p <- rep(0, N) + I <- nk(N, m) + N <- dim(I)[1] + for (i in 1:N) { + ni <- c(I[i, ]) + p[i] <- dmultinom(ni, prob = pk) + } + p } \ No newline at end of file diff --git a/README.md b/README.md index 25efb61..5f42955 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,163 @@ # TeachingSampling + ### An R package that draws complex samples and estimates complex parameters -This is the version control site for the `TeachingSampling` package. This software allows you to select samples from the most common (but somehow complex) sampling schemes. Along with this feature, the software allows you to estimate parameters such as totals, means, ratios, coefficient regressions, percentiles, medians, etc. +`TeachingSampling` allows you to select samples from the most common probabilistic sampling designs and estimate complex parameters such as totals, means, ratios, regression coefficients, and quantiles. + +The package is based on: -The software is based on the book written by the author of the package. +> Gutierrez, H. A. (2009). *Estrategias de muestreo: diseño de encuestas y estimación de parámetros*. Editorial Universidad Santo Tomás. -Gutierrez, H. A. (2009), *Estrategias de muestreo: diseno de encuestas y estimacion de parametros*. Editorial Universidad Santo Tomas +--- ## Installation -First, you need to install the `devtools` R package +### Stable version from CRAN + +```r +install.packages("TeachingSampling") ``` + +### Development version from GitHub + +```r install.packages("devtools") +devtools::install_github("psirusteam/TeachingSampling") ``` -Then load the `devtools` R package -``` -library(devtools) -``` +--- -Finally type -``` -install_github("psirusteam/TeachingSampling") +## Functions + +### Sampling designs + +| Function | Description | +|---|---| +| `S.SI()` | Simple random sampling without replacement | +| `S.SY()` | Systematic sampling | +| `S.BE()` | Bernoulli sampling | +| `S.PO()` | Poisson sampling | +| `S.WR()` | Simple random sampling with replacement | +| `S.PPS()` | PPS sampling with replacement | +| `S.piPS()` | PPS sampling without replacement | +| `S.STSI()` | Stratified simple random sampling | +| `S.STPPS()` | Stratified PPS sampling with replacement | +| `S.STpiPS()` | Stratified PPS sampling without replacement | + +### Inclusion probabilities + +| Function | Description | +|---|---| +| `PikPPS()` | Inclusion probabilities proportional to size | +| `PikSTPPS()` | Inclusion probabilities for stratified PPS | +| `PikHol()` | Optimal inclusion probabilities (Holmberg) | +| `Pik()` | First-order inclusion probabilities from design | +| `Pikl()` | Second-order inclusion probabilities | + +### Estimation + +| Function | Description | +|---|---| +| `E.SI()` | Estimation under simple random sampling | +| `E.SY()` | Estimation under systematic sampling | +| `E.BE()` | Estimation under Bernoulli sampling | +| `E.PO()` | Estimation under Poisson sampling | +| `E.WR()` | Estimation under with-replacement sampling | +| `E.PPS()` | Hansen-Hurwitz estimator under PPS-WR | +| `E.piPS()` | HT estimator under piPS sampling | +| `E.STSI()` | Estimation under stratified SI | +| `E.STPPS()` | Estimation under stratified PPS-WR | +| `E.STpiPS()` | Estimation under stratified piPS | +| `E.1SI()` | Estimation under single-stage cluster sampling | +| `E.2SI()` | Estimation under two-stage SI sampling | +| `E.UC()` | Estimation using the Ultimate Cluster method | +| `E.Quantile()` | Weighted quantile estimation | +| `E.Trim()` | Weight trimming and redistribution | + +### Regression and calibration + +| Function | Description | +|---|---| +| `E.Beta()` | Regression coefficient estimation | +| `GREG.SI()` | Generalised regression estimator | +| `Wk()` | GREG calibration weights | +| `IPFP()` | Iterative proportional fitting (raking) | + +### Variance estimation + +| Function | Description | +|---|---| +| `VarHT()` | Exact Horvitz-Thompson variance | +| `VarSYGHT()` | HT and Sen-Yates-Grundy variance estimators | +| `HT()` | Horvitz-Thompson estimator | +| `Deltakl()` | Matrix of joint inclusion probability differences | + +### Sampling support (small populations) + +| Function | Description | +|---|---| +| `Support()` | Sampling support for SI designs | +| `SupportWR()` | Sampling support for WR designs | +| `SupportRS()` | Complete support for all sample sizes | +| `Ik()` | Sample membership indicator matrix | +| `IkWR()` | Frequency indicator matrix for WR sampling | +| `IkRS()` | Indicator matrix for all sample sizes | +| `OrderWR()` | Ordered WR sampling support | +| `nk()` | Frequency matrix for WR sampling | +| `p.WR()` | Sample probabilities under WR sampling | + +### Allocation + +| Function | Description | +|---|---| +| `kish_allocation()` | Kish compromise allocation for stratified sampling | + +### Utilities + +| Function | Description | +|---|---| +| `Domains()` | Domain indicator matrix | +| `T.SIC()` | Cluster totals for single-stage sampling | + +--- + +## Usage example + +```r +library(TeachingSampling) + +data("Lucy") +N <- nrow(Lucy) +n <- 400 + +# Draw a simple random sample without replacement +sam <- S.SI(N, n) +sam <- sam[sam != 0] + +# Estimate population totals +y <- data.frame(Income = Lucy$Income[sam], + Expenditure = Lucy$Expenditure[sam]) + +E.SI(N, n, y) ``` -## Author -This package is maintained by Andrés Gutiérrez. Email: hagutierrezro@gmail.com +--- + +## Authors + +**Hugo Andrés Gutiérrez Rojas** — Package author and maintainer +Email: hagutierrezro@gmail.com +GitHub: [@psirusteam](https://github.com/psirusteam) + +**Yury Vanessa Ochoa Montes** +Email: yury.ochoa@urosario.edu.co + +--- + +## Support + +- 📖 [Reference manual (CRAN)](https://cran.r-project.org/web/packages/TeachingSampling/TeachingSampling.pdf) +- [CRAN page](https://cran.r-project.org/web/packages/TeachingSampling) +- [Report an issue](https://github.com/psirusteam/TeachingSampling/issues) -### Support or Contact -Having trouble with the `TeachingSampling` package? Check out the [documentation](http://cran.r-project.org/web/packages/TeachingSampling/TeachingSampling.pdf) or [contact support](https://github.com/psirusteam). Comments, amends, and critics are very welcome. This is the [CRAN site](http://cran.r-project.org/web/packages/TeachingSampling) of the last stable version of the package. \ No newline at end of file +Comments, corrections, and suggestions are always welcome. diff --git a/man/BigCity.Rd b/man/BigCity.Rd deleted file mode 100644 index 3b7d5d8..0000000 --- a/man/BigCity.Rd +++ /dev/null @@ -1,52 +0,0 @@ -\name{BigCity} -\docType{data} -\alias{BigCity} -\title{Full Person-level Population Database} -\description{ -This data set corresponds to some socioeconomic variables from 150266 people of a city in a particular year. -} -\seealso{ -\code{\link{Lucy}, \link{BigLucy}} -} -\usage{data(BigCity)} -\format{ - \describe{ -\item{HHID}{The identifier of the household. It corresponds to an alphanumeric sequence (four letters and five digits).} -\item{PersonID}{The identifier of the person within the household. NOTE it is not a unique identifier of a person for the whole population. It corresponds to an alphanumeric sequence (five letters and two digits).} -\item{Stratum}{Households are located in geographic strata. There are 119 strata across the city.} -\item{PSU}{Households are clustered in cartographic segments defined as primary sampling units (PSU). There are 1664 PSU and they are nested within strata.} -\item{Zone}{Segments clustered within strata can be located within urban or rural areas along the city.} -\item{Sex}{Sex of the person.} -\item{Income}{Per capita monthly income.} -\item{Expenditure}{Per capita monthly expenditure.} -\item{Employment}{A person's employment status.} -\item{Poverty}{This variable indicates whether the person is poor or not. It depends on income.} -} -} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} - -\examples{ -data(BigCity) -attach(BigCity) - -estima <- data.frame(Income, Expenditure) -# The population totals -colSums(estima) -# Some parameters of interest -table(Poverty, Zone) -xtabs(Income ~ Poverty + Zone) -# Correlations among characteristics of interest -cor(estima) -# Some useful histograms -hist(Income) -hist(Expenditure) -# Some useful plots -boxplot(Income ~ Poverty) -barplot(table(Employment)) -pie(table(MaritalST)) -} -\keyword{datasets} diff --git a/man/BigLucy.rd b/man/BigLucy.rd deleted file mode 100644 index 29fb639..0000000 --- a/man/BigLucy.rd +++ /dev/null @@ -1,57 +0,0 @@ -\name{BigLucy} -\docType{data} -\alias{BigLucy} -\title{Full Business Population Database} -\description{ -This data set corresponds to some financial variables of 85396 industrial companies of a city in a particular fiscal year. -} -\seealso{ -\code{\link{Lucy}, \link{BigCity}} -} -\usage{data(BigLucy)} -\format{ - \describe{ -\item{ID}{The identifier of the company. It correspond to an alphanumeric sequence (two letters and three digits)} -\item{Ubication}{The address of the principal office of the company in the city} -\item{Level}{The industrial companies are discrimitnated according to the Taxes declared. -There are small, medium and big companies} -\item{Zone}{The country is divided by counties. A company belongs to a particular zone according to its cartographic location.} -\item{Income}{The total ammount of a company's earnings (or profit) in the previuos fiscal year. It is calculated by taking -revenues and adjusting for the cost of doing business} -\item{Employees}{The total number of persons working for the company in the previuos fiscal year} -\item{Taxes}{The total ammount of a company's income Tax} -\item{SPAM}{Indicates if the company uses the Internet and WEBmail options in order to make self-propaganda.} -\item{ISO}{Indicates if the company is certified by the International Organization for Standardization.} -\item{Years}{The age of the company.} -\item{Segments}{Cartographic segments by county. A segment comprises in average 10 companies located close to each other.} -} -} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} - -\examples{ -data(BigLucy) -attach(BigLucy) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -# The population totals -colSums(estima) -# Some parameters of interest -table(SPAM,Level) -xtabs(Income ~ Level+SPAM) -# Correlations among characteristics of interest -cor(estima) -# Some useful histograms -hist(Income) -hist(Taxes) -hist(Employees) -# Some useful plots -boxplot(Income ~ Level) -barplot(table(Level)) -pie(table(SPAM)) -} -\keyword{datasets} diff --git a/man/Deltakl.rd b/man/Deltakl.rd index b088ed4..4eadd6d 100644 --- a/man/Deltakl.rd +++ b/man/Deltakl.rd @@ -1,40 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Deltakl.r \name{Deltakl} \alias{Deltakl} -\title{Variance-Covariance Matrix of the Sample Membership Indicators for Fixed Size Without Replacement Sampling Designs} -\description{Computes the Variance-Covariance matrix of the sample membership indicators in the population given a -fixed sample size design} +\title{Matrix of Joint Inclusion Probability Differences} \usage{ Deltakl(N, n, p) } \arguments{ -\item{N}{Population size} -\item{n}{Sample size} -\item{p}{A vector containing the selection probabilities of a fixed size without replacement sampling design. The sum of the values of this vector must be one} +\item{N}{Population size. Recommended \code{N <= 15}.} + +\item{n}{Sample size.} + +\item{p}{Vector of probabilities for each possible sample in the support. +Must sum to 1.} } -\seealso{ -\code{\link{VarHT}, \link{Pikl}, \link{Pik}} +\value{ +An \code{N x N} matrix where entry \eqn{(k, l)} equals +\eqn{\pi_{kl} - \pi_k \pi_l}. Diagonal entries equal +\eqn{\pi_k(1 - \pi_k)}. } -\details{The \eqn{kl}th unit of the Variance-Covariance matrix of the sample membership indicators is defined as -\eqn{\Delta_{kl}=\pi_{kl}-\pi_k\pi_l} +\description{ +Computes the matrix \eqn{\Delta_{kl} = \pi_{kl} - \pi_k \pi_l} for all +pairs of units in a finite population. This matrix appears in the exact +Horvitz-Thompson variance formula. } -\value{The function returns a symmetric matrix of size \eqn{N \times N} containing the variances-covariances among the sample membership indicators for each pair of units in the finite population.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\details{ +The matrix \eqn{\Delta} is central to the Horvitz-Thompson variance +estimator: +\deqn{V(\hat{t}_{y,\pi}) = \sum_k \sum_l \Delta_{kl} \frac{y_k}{\pi_k} +\frac{y_l}{\pi_l}} +It requires computing both first-order (\code{\link{Pik}}) and +second-order (\code{\link{Pikl}}) inclusion probabilities, so it is only +feasible for small populations. } \examples{ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) -# The sample size is n=2 +N <- 5 n <- 2 -# p is the probability of selection of every sample. p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -# Note that the sum of the elements of this vector is one -sum(p) -# Computation of the Variance-Covariance matrix of the sample membership indicators -Deltakl(N, n, p) +Delta <- Deltakl(N, n, p) +Delta +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Pik}}, \code{\link{Pikl}}, \code{\link{VarHT}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/Domains.rd b/man/Domains.rd index 8a3f870..f1c7555 100644 --- a/man/Domains.rd +++ b/man/Domains.rd @@ -1,67 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Domains.r \name{Domains} \alias{Domains} -\title{Domains Indicator Matrix} -\description{Creates a matrix of domain indicator variables for every single unit in the selected sample or in the entire population} +\title{Domain Indicator Matrix} \usage{ Domains(y) } \arguments{ -\item{y}{Vector of the domain of interest containing the membership of each unit to a specified category of the domain} +\item{y}{A vector (factor or coercible to factor) identifying the domain +membership of each unit in the sample.} } -\seealso{ -\code{\link{E.SI}} +\value{ +A binary matrix of dimension \code{n x D}, where \code{D} is the number +of domains (levels of \code{y}). Entry \eqn{(k, d) = 1} if unit \eqn{k} +belongs to domain \eqn{d}, and 0 otherwise. Column names are the domain +labels. } -\details{Each value of y represents the domain which a specified unit belongs} -\value{The function returns a \eqn{n\times p} matrix, where \eqn{n} is the number of units in the selected -sample and \eqn{p} is the number of categories of the domain of interest. The values of this matrix are zero, if the unit does not -belongs to a specified category and one, otherwise.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Creates a binary indicator matrix that identifies the domain membership +of each unit in the sample. Each column corresponds to one domain +(level of \code{y}) and each row to one unit. +} +\details{ +This function is useful for domain estimation, where population totals or +means must be estimated for subgroups of the population. The indicator +matrix can be multiplied element-wise with the variable of interest to +restrict estimation to each domain. } \examples{ -############ -## Example 1 -############ -# This domain contains only two categories: "yes" and "no" -x <- as.factor(c("yes","yes","yes","no","no","no","no","yes","yes")) -Domains(x) - -############ -## Example 2 -############ -# Uses the Lucy data to draw a random sample of units according -# to a SI design -data(Lucy) +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] -n <- 400 -sam <- sample(N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variable SPAM is a domain of interest -Doma <- Domains(SPAM) -Doma -# HT estimation of the absolute domain size for every category in the domain -# of interest -E.SI(N,n,Doma) - -############ -## Example 3 -############ -# Following with Example 2... -# The variables of interest are: Income, Employees and Taxes -# This function allows to estimate the population total of this variables for every -# category in the domain of interest SPAM -estima <- data.frame(Income, Employees, Taxes) -SPAM.no <- estima*Doma[,1] -SPAM.yes <- estima*Doma[,2] -E.SI(N,n,SPAM.no) -E.SI(N,n,SPAM.yes) +N <- nrow(Lucy) +n <- 400 +sam <- S.SI(N, n) +# Level has 3 domains: Small, Medium, Big +dom <- Domains(Level[sam]) +head(dom) +colSums(dom) # sample sizes per domain +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.SI}}, \code{\link{E.STSI}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/E.2SI.rd b/man/E.2SI.rd index 363d861..fee003a 100644 --- a/man/E.2SI.rd +++ b/man/E.2SI.rd @@ -1,143 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.2SI.r \name{E.2SI} \alias{E.2SI} -\title{Estimation of the Population Total under Two Stage Simple Random Sampling Without Replacement} -\description{Computes the Horvitz-Thompson estimator of the population total according to a 2SI sampling design} +\title{Estimation of the Population Total under Two Stage Simple Random Sampling} \usage{ E.2SI(NI, nI, Ni, ni, y, PSU) } \arguments{ -\item{NI}{Population size of Primary Sampling Units} -\item{nI}{Sample size of Primary Sampling Units} -\item{Ni}{Vector of population sizes of Secundary Sampling Units selected in the first draw} -\item{ni}{Vector of sample sizes of Secundary Sampling Units} -\item{y}{Vector, matrix or data frame containig the recollected information of the variables of interest for every -unit in the selected sample} -\item{PSU}{Vector identifying the membership to the strata of each unit in the population} +\item{NI}{Population size of Primary Sampling Units (PSUs).} + +\item{nI}{Sample size of Primary Sampling Units (PSUs).} + +\item{Ni}{Vector of population sizes of Secondary Sampling Units within +each selected PSU.} + +\item{ni}{Vector of sample sizes of Secondary Sampling Units within +each selected PSU.} + +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every unit in the selected sample.} + +\item{PSU}{Vector identifying the PSU membership of each unit in the sample.} } -\seealso{ -\code{\link{S.SI}} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error of the total. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. } -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error and its estimated coefficient of variation} -\value{The function returns a data matrix whose columns correspond to the estimated parameters of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Dise?o de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Computes the Horvitz-Thompson estimator of the population total under a +two-stage simple random sampling without replacement design, where both +Primary Sampling Units (PSUs) and Secondary Sampling Units (SSUs) are +selected by simple random sampling without replacement. +} +\details{ +The variance estimator decomposes into two components: the between-PSU +component and the within-PSU component, following the classical two-stage +variance decomposition of Sarndal et al. (1992). } \examples{ -############ -## Example 1 -############ -# Uses Lucy data to draw a twostage simple random sample -# accordind to a 2SI design. Zone is the clustering variable -data(Lucy) -attach(Lucy) -summary(Zone) -# The population of clusters or Primary Sampling Units -UI<-c("A","B","C","D","E") -NI <- length(UI) -# The sample size is nI=3 -nI <- 3 -# Selects the sample of PSUs -samI<-S.SI(NI,nI) -dataI<-UI[samI] -dataI -# The sampling frame of Secondary Sampling Unit is saved in Lucy1 ... Lucy3 -Lucy1<-Lucy[which(Zone==dataI[1]),] -Lucy2<-Lucy[which(Zone==dataI[2]),] -Lucy3<-Lucy[which(Zone==dataI[3]),] -# The size of every single PSU -N1<-dim(Lucy1)[1] -N2<-dim(Lucy2)[1] -N3<-dim(Lucy3)[1] -Ni<-c(N1,N2,N3) -# The sample size in every PSI is 135 Secondary Sampling Units -n1<-135 -n2<-135 -n3<-135 -ni<-c(n1,n2,n3) -# Selects a sample of Secondary Sampling Units inside the PSUs -sam1<-S.SI(N1,n1) -sam2<-S.SI(N2,n2) -sam3<-S.SI(N3,n3) -# The information about each Secondary Sampling Unit in the PSUs -# is saved in data1 ... data3 -data1<-Lucy1[sam1,] -data2<-Lucy2[sam2,] -data3<-Lucy3[sam3,] -# The information about each unit in the final selected sample is saved in data -data<-rbind(data1, data2, data3) -attach(data) -# The clustering variable is Zone -Cluster <- as.factor(as.integer(Zone)) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -# Estimation of the Population total -E.2SI(NI,nI,Ni,ni,estima,Cluster) +library(TeachingSampling) +data('BigCity') +library(dplyr) +Households <- BigCity \%>\% + group_by(HHID) \%>\% + summarise(PSU = unique(PSU), + Persons = n(), + Income = sum(Income), + Expenditure = sum(Expenditure)) -######################################################## -## Example 2 Total Census to the entire population -######################################################## -# Uses Lucy data to draw a cluster random sample -# accordind to a SI design ... -# Zone is the clustering variable -data(Lucy) -attach(Lucy) -summary(Zone) -# The population of clusters -UI<-c("A","B","C","D","E") +UI <- levels(as.factor(Households$PSU)) NI <- length(UI) -# The sample size equals to the population size of PSU -nI <- NI -# Selects every single PSU -samI<-S.SI(NI,nI) -dataI<-UI[samI] -dataI -# The sampling frame of Secondary Sampling Unit is saved in Lucy1 ... Lucy5 -Lucy1<-Lucy[which(Zone==dataI[1]),] -Lucy2<-Lucy[which(Zone==dataI[2]),] -Lucy3<-Lucy[which(Zone==dataI[3]),] -Lucy4<-Lucy[which(Zone==dataI[4]),] -Lucy5<-Lucy[which(Zone==dataI[5]),] -# The size of every single PSU -N1<-dim(Lucy1)[1] -N2<-dim(Lucy2)[1] -N3<-dim(Lucy3)[1] -N4<-dim(Lucy4)[1] -N5<-dim(Lucy5)[1] -Ni<-c(N1,N2,N3,N4,N5) -# The sample size of Secondary Sampling Units equals to the size of each PSU -n1<-N1 -n2<-N2 -n3<-N3 -n4<-N4 -n5<-N5 -ni<-c(n1,n2,n3,n4,n5) -# Selects every single Secondary Sampling Unit inside the PSU -sam1<-S.SI(N1,n1) -sam2<-S.SI(N2,n2) -sam3<-S.SI(N3,n3) -sam4<-S.SI(N4,n4) -sam5<-S.SI(N5,n5) -# The information about each unit in the cluster is saved in Lucy1 ... Lucy5 -data1<-Lucy1[sam1,] -data2<-Lucy2[sam2,] -data3<-Lucy3[sam3,] -data4<-Lucy4[sam4,] -data5<-Lucy5[sam5,] -# The information about each Secondary Sampling Unit -# in the sample (census) is saved in data -data<-rbind(data1, data2, data3, data4, data5) -attach(data) -# The clustering variable is Zone -Cluster <- as.factor(as.integer(Zone)) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -# Estimation of the Population total -E.2SI(NI,nI,Ni,ni,estima,Cluster) -# Sampling error is null +nI <- 10 +samI <- S.SI(NI, nI) +sampleI <- UI[samI] +CityI <- Households[Households$PSU \%in\% sampleI, ] + +Ni <- as.numeric(table(CityI$PSU)) +ni <- ceiling(Ni * 0.2) + +estima <- data.frame(CityI$Persons, CityI$Income, CityI$Expenditure) +area <- as.factor(CityI$PSU) + +E.2SI(NI, nI, Ni, ni, estima, area) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.1SI}}, \code{\link{E.UC}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/E.BE.rd b/man/E.BE.rd index ac0ca84..9719e96 100644 --- a/man/E.BE.rd +++ b/man/E.BE.rd @@ -1,41 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.BE.r \name{E.BE} \alias{E.BE} -\title{Estimation of the Population Total under Bernoulli Sampling Without Replacement} -\description{Computes the Horvitz-Thompson estimator of the population total according to a BE sampling design} +\title{Estimation of the Population Total under Bernoulli Sampling} \usage{ E.BE(y, prob) } \arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every unit in the selected sample} -\item{prob}{Inclusion probability for each unit in the population} +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every unit in the selected sample.} + +\item{prob}{Scalar. The (constant) inclusion probability used in the +Bernoulli sampling design. Must satisfy \code{0 < prob <= 1}.} } -\seealso{ -\code{\link{S.BE}} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error of the total. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. } -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error and its estimated coefficient of variation under an BE sampling design} -\value{The function returns a data matrix whose columns correspond to the estimated parameters of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Computes the Horvitz-Thompson estimator of the population total under a +Bernoulli sampling design, where each unit in the population is independently +selected with the same inclusion probability. +} +\details{ +Under Bernoulli sampling, the sample size is random. The inclusion +probability is constant and equal to \code{prob} for all units. The +variance estimator accounts for the randomness of the sample size. } \examples{ -# Uses the Lucy data to draw a Bernoulli sample -data(Lucy) +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] -n=400 -prob=n/N -sam <- S.BE(N,prob) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.BE(estima,prob) -} -\keyword{survey} \ No newline at end of file +N <- nrow(Lucy) +prob <- 0.1 +sam <- S.BE(N, prob) +sam <- sam[sam != 0] +y <- data.frame(Income = Income[sam], Employees = Employees[sam]) +E.BE(y, prob) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.BE}}, \code{\link{E.SI}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/E.Beta.rd b/man/E.Beta.rd index 9ac937f..4077753 100644 --- a/man/E.Beta.rd +++ b/man/E.Beta.rd @@ -1,111 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.Beta.r \name{E.Beta} \alias{E.Beta} -\title{Estimation of the population regression coefficients under SI designs} -\description{Computes the estimation of regression coefficients using the principles of the Horvitz-Thompson estimator} +\title{Estimation of Regression Coefficients under Simple Random Sampling} \usage{ -E.Beta(N, n, y, x, ck=1, b0=FALSE) +E.Beta(N, n, y, x, ck = 1, b0 = FALSE) } \arguments{ -\item{N}{The population size} -\item{n}{The sample size} -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every unit in the selected sample} -\item{x}{Vector, matrix or data frame containing the recollected auxiliary information for every unit in the selected sample} -\item{ck}{By default equals to one. It is a vector of weights induced by the structure of variance of the supposed model} -\item{b0}{By default FALSE. The intercept of the regression model} -} -\seealso{ -\code{\link{GREG.SI}} -} -\details{Returns the estimation of the population regression coefficients in a supposed linear model, its estimated variance and its estimated coefficient of variation under an SI sampling design} -\value{The function returns a vector whose entries correspond to the estimated parameters of the regression coefficients} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} -\examples{ -###################################################################### -## Example 1: Linear models involving continuous auxiliary information -###################################################################### - -# Draws a simple random sample without replacement -data(Lucy) -attach(Lucy) - -N <- dim(Lucy)[1] -n <- 400 -sam <- S.SI(N, n) -# The information about the units in the sample -# is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) - -########### common mean model - -estima<-data.frame(Income, Employees, Taxes) -x <- rep(1,n) -E.Beta(N, n, estima,x,ck=1,b0=FALSE) - - -########### common ratio model - -estima<-data.frame(Income) -x <- data.frame(Employees) -E.Beta(N, n, estima,x,ck=x,b0=FALSE) - -########### Simple regression model without intercept - -estima<-data.frame(Income, Employees) -x <- data.frame(Taxes) -E.Beta(N, n, estima,x,ck=1,b0=FALSE) - -########### Multiple regression model without intercept +\item{N}{Population size.} -estima<-data.frame(Income) -x <- data.frame(Employees, Taxes) -E.Beta(N, n, estima,x,ck=1,b0=FALSE) +\item{n}{Sample size.} -########### Simple regression model with intercept +\item{y}{Vector, matrix or data frame of variables of interest (response).} -estima<-data.frame(Income, Employees) -x <- data.frame(Taxes) -E.Beta(N, n, estima,x,ck=1,b0=TRUE) +\item{x}{Vector, matrix or data frame of auxiliary variables (predictors).} -########### Multiple regression model with intercept +\item{ck}{Optional variance-stabilising constant. Default is \code{1} +(homoscedastic model).} -estima<-data.frame(Income) -x <- data.frame(Employees, Taxes) -E.Beta(N, n, estima,x,ck=1,b0=TRUE) - -############################################################### -## Example 2: Linear models with discrete auxiliary information -############################################################### - -# Draws a simple random sample without replacement -data(Lucy) +\item{b0}{Logical. If \code{TRUE}, an intercept column of ones is +prepended to \code{x}. Default is \code{FALSE}.} +} +\value{ +A three-dimensional array with dimensions \code{[3, P, Q]}, where +\code{P} is the number of auxiliary variables and \code{Q} is the number +of variables of interest. The three rows correspond to: +\itemize{ + \item \code{Beta estimation}: Estimated regression coefficient. + \item \code{Standard Error}: Estimated standard error. + \item \code{CVE}: Estimated coefficient of variation (in percentage). +} +} +\description{ +Computes the weighted least squares estimator of regression coefficients +for a finite population under simple random sampling without replacement. +Both the estimated coefficients and their estimated standard errors are +returned. +} +\details{ +The estimator uses a working model with weights \eqn{V = 1/(\pi_k c_k)}, +where \eqn{\pi_k = n/N} under simple random sampling and \eqn{c_k} is an +optional variance-stabilising constant. The variance is estimated using +the residual-based sandwich approach of Sarndal et al. (1992). +} +\examples{ +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] +N <- nrow(Lucy) n <- 400 -sam <- S.SI(N,n) -# The information about the sample units is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The auxiliary information -Doma<-Domains(Level) - -########### Poststratified common mean model - -estima<-data.frame(Income, Employees, Taxes) -E.Beta(N, n, estima,Doma,ck=1,b0=FALSE) - -########### Poststratified common ratio model - -estima<-data.frame(Income, Employees) -x<-Doma*Taxes -E.Beta(N, n, estima,x,ck=1,b0=FALSE) +sam <- S.SI(N, n) +y <- data.frame(Income = Income[sam]) +x <- data.frame(Employees = Employees[sam]) +E.Beta(N, n, y, x, b0 = TRUE) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{GREG.SI}}, \code{\link{E.SI}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/E.PO.rd b/man/E.PO.rd index 5fc282c..8916a4b 100644 --- a/man/E.PO.rd +++ b/man/E.PO.rd @@ -1,45 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.PO.r \name{E.PO} \alias{E.PO} -\title{Estimation of the Population Total under Poisson Sampling Without Replacement} -\description{Computes the Horvitz-Thompson estimator of the population total according to a PO sampling design} +\title{Estimation of the Population Total under Poisson Sampling} \usage{ E.PO(y, Pik) } \arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every unit in the selected sample} -\item{Pik}{Vector of inclusion probabilities for each unit in the selected sample} +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every unit in the selected sample.} + +\item{Pik}{Vector of first-order inclusion probabilities for each unit +in the sample.} } -\seealso{ -\code{\link{S.PO}} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error of the total. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. } -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error and its estimated coefficient of variation under a PO sampling design} -\value{The function returns a data matrix whose columns correspond to the estimated parameters of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Computes the Horvitz-Thompson estimator of the population total under a +Poisson sampling design, where each unit is independently selected with +its own inclusion probability. +} +\details{ +Under Poisson sampling, units are selected independently, so the exact +variance of the Horvitz-Thompson estimator has a simple closed form: +\eqn{V(\hat{t}) = \sum_k (1 - \pi_k)(y_k/\pi_k)^2}. } \examples{ -# Uses the Lucy data to draw a Poisson sample -data(Lucy) +data('Lucy') attach(Lucy) -N <- dim(Lucy)[1] -# The population size is 2396. The expected sample size is 400 -# The inclusion probability is proportional to the variable Income +N <- nrow(Lucy) n <- 400 -Pik<-n*Income/sum(Income) -# The selected sample -sam <- S.PO(N,Pik) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The inclusion probabilities of each unit in the selected smaple -inclusion <- Pik[sam] -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.PO(estima,inclusion) -} -\keyword{survey} \ No newline at end of file +Pik <- PikPPS(n, Employees) +sam <- S.PO(N, Pik) +sam <- sam[sam != 0] +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.PO(y, Pik[sam]) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.PO}}, \code{\link{E.piPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/E.PPS.rd b/man/E.PPS.rd index 83ca3ae..6cf8992 100644 --- a/man/E.PPS.rd +++ b/man/E.PPS.rd @@ -1,44 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.PPS.r \name{E.PPS} \alias{E.PPS} -\title{Estimation of the Population Total under Probability Proportional to Size Sampling With Replacement} -\description{Computes the Hansen-Hurwitz estimator of the population total according to a probability proportional to size sampling with replacement design} +\title{Estimation of the Population Total under PPS With-Replacement Sampling} \usage{ E.PPS(y, pk) } \arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every unit in the selected sample} -\item{pk}{A vector containing selection probabilities for each unit in the sample} +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every selected unit (with possible repetitions).} + +\item{pk}{Vector of selection probabilities for each draw in the sample.} } -\seealso{ -\code{\link{S.PPS}, \link{HH}} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error of the total. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. } -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error and its estimated coefficient of variation estimated under a probability proportional to size sampling with replacement design} -\value{The function returns a data matrix whose columns correspond to the estimated parameters of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Computes the Hansen-Hurwitz estimator of the population total under a +probability proportional to size with-replacement (PPS-WR) sampling design. +} +\details{ +The Hansen-Hurwitz estimator is \eqn{\hat{t} = (1/m)\sum_{i=1}^m y_i/p_i}, +where \eqn{p_i} is the selection probability of the \eqn{i}-th draw and +\eqn{m} is the number of draws. } \examples{ -# Uses the Lucy data to draw a random sample according to a -# PPS with replacement design -data(Lucy) +data('Lucy') attach(Lucy) -# The selection probability of each unit is proportional to the variable Income -m <- 400 -res <- S.PPS(m,Income) -# The selected sample -sam <- res[,1] -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# pk.s is the selection probability of each unit in the selected sample -pk.s <- res[,2] -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.PPS(estima,pk.s) -} -\keyword{survey} \ No newline at end of file +m <- 400 +res <- S.PPS(m, Employees) +sam <- res[, 1] +pk <- res[, 2] +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.PPS(y, pk) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.PPS}}, \code{\link{HH}}, \code{\link{E.piPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/E.Quantile.rd b/man/E.Quantile.rd index 047bf25..c6d1209 100644 --- a/man/E.Quantile.rd +++ b/man/E.Quantile.rd @@ -1,78 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.Quantile.r \name{E.Quantile} \alias{E.Quantile} -\title{Estimation of a Population quantile} -\description{Computes the estimation of a population quantile using the principles of the Horvitz-Thompson estimator} +\title{Estimation of Population Quantiles} \usage{ E.Quantile(y, Qn, Pik) } \arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} -\item{Qn}{Quantile of interest} -\item{Pik}{A vector containing inclusion probabilities for each unit in the sample. -If missing, the function will assign the same weights to each unit in the sample} +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every unit in the selected sample.} + +\item{Qn}{Scalar in \eqn{(0, 1)}. The desired quantile level +(e.g. \code{0.5} for the median, \code{0.25} for the first quartile).} + +\item{Pik}{Optional vector of first-order inclusion probabilities. If +omitted, equal probabilities are assumed.} } -\seealso{ -\code{\link{HT}} +\value{ +A numeric vector of length equal to the number of variables in \code{y}, +containing the estimated quantile for each variable. } -\details{Returns the estimation of the population quantile of every single variable of interest} -\value{The function returns a vector whose entries correspond to the estimated quantiles of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Computes a weighted quantile estimator for finite populations. When +inclusion probabilities are provided, the estimator uses the +Horvitz-Thompson weights \eqn{d_k = 1/\pi_k}; otherwise, equal weights +are assumed (simple random sampling). +} +\details{ +The estimator is based on the weighted empirical cumulative distribution +function. For each variable, units are sorted by their observed value, +cumulative weights are computed, and the quantile is located by +interpolation. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vectors y and x give the values of the variables of interest -y<-c(32, 34, 46, 89, 35) -x<-c(52, 60, 75, 100, 50) -z<-cbind(y,x) -# Inclusion probabilities for a design of size n=2 -Pik<-c(0.58, 0.34, 0.48, 0.33, 0.27) -# Estimation of the sample median -E.Quantile(y, 0.5) -# Estimation of the sample Q1 -E.Quantile(x, 0.25) -# Estimation of the sample Q3 -E.Quantile(z, 0.75) -# Estimation of the sample median -E.Quantile(z, 0.5, Pik) - -############ -## Example 2 -############ -# Uses the Lucy data to draw a PPS sample with replacement - -data(Lucy) +data('Lucy') attach(Lucy) +N <- nrow(Lucy) +n <- 400 +sam <- S.SI(N, n) +Pik <- rep(n/N, n) +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -# The selection probability of each unit is proportional to the variable Income -# The sample size is m=400 -m=400 -res <- S.PPS(m,Income) -# The selected sample -sam <- res[,1] -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -# The vector of selection probabilities of units in the sample -pk.s <- res[,2] -# The vector of inclusion probabilities of units in the sample -Pik.s<-1-(1-pk.s)^m -# The information about the sample units is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -# Estimation of sample median -E.Quantile(estima,0.5,Pik.s) +# Median +E.Quantile(y, Qn = 0.5, Pik = Pik) +# First quartile +E.Quantile(y, Qn = 0.25, Pik = Pik) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.SI}}, \code{\link{E.piPS}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/E.SI.rd b/man/E.SI.rd index 774baa0..b123ae8 100644 --- a/man/E.SI.rd +++ b/man/E.SI.rd @@ -1,112 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.SI.r \name{E.SI} \alias{E.SI} -\title{Estimation of the Population Total under Simple Random Sampling Without Replacement} -\description{Computes the Horvitz-Thompson estimator of the population total according to an SI sampling design} +\title{Estimation of the Population Total under Simple Random Sampling Without +Replacement} \usage{ E.SI(N, n, y) } \arguments{ -\item{N}{Population size} -\item{n}{Sample size} -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} +\item{N}{Population size.} + +\item{n}{Sample size.} + +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every unit in the selected sample.} } -\seealso{ -\code{\link{S.SI}} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error of the total. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect (always 1 under SI, included for + consistency with other estimators). } -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error and its -estimated coefficient of variation under an SI sampling design} -\value{The function returns a data matrix whose columns correspond to the estimated parameters of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Computes the Horvitz-Thompson estimator of the population total under a +simple random sampling without replacement (SI) design. +} +\details{ +Under simple random sampling without replacement, the Horvitz-Thompson +estimator reduces to \eqn{\hat{t}_y = N \bar{y}_s}, the expansion +estimator. The design effect is always 1 because SI is the reference design. } \examples{ -############ -## Example 1 -############ -# Uses the Lucy data to draw a random sample of units according to a SI design -data(Lucy) +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] -n <- 400 -sam <- S.SI(N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.SI(N,n,estima) - -############ -## Example 2 -############ -# Following with Example 1. The variable SPAM is a domain of interest -Doma <- Domains(SPAM) -# This function allows to estimate the size of each domain in SPAM -estima <- data.frame(Doma) -E.SI(N,n,Doma) - -############ -## Example 3 -############ -# Following with Example 1. The variable SPAM is a domain of interest -Doma <- Domains(SPAM) -# This function allows to estimate the parameters of the variables of interest -# for every category in the domain SPAM -estima <- data.frame(Income, Employees, Taxes) -SPAM.no <- cbind(Doma[,1], estima*Doma[,1]) -SPAM.yes <- cbind(Doma[,1], estima*Doma[,2]) -# Before running the following lines, notice that: -# The first column always indicates the population size -# The second column is an estimate of the size of the category in the domain SPAM -# The remaining columns estimates the parameters of interest -# within the corresponding category in the domain SPAM -E.SI(N,n,SPAM.no) -E.SI(N,n,SPAM.yes) - -############ -## Example 4 -############ -# Following with Example 1. The variable SPAM is a domain of interest -# and the variable ISO is a populational subgroup of interest -Doma <- Domains(SPAM) -estima <- Domains(Zone) -# Before running the following lines, notice that: -# The first column indicates wheter the unit -# belongs to the first category of SPAM or not -# The remaining columns indicates wheter the unit -# belogns to the categories of Zone -SPAM.no <- data.frame(SpamNO=Doma[,1], Zones=estima*Doma[,1]) -# Before running the following lines, notice that: -# The first column indicates wheter the unit -# belongs to the second category of SPAM or not -# The remaining columns indicates wheter the unit -# belogns to the categories of Zone -SPAM.yes <- data.frame(SpamYES=Doma[,2], Zones=estima*Doma[,2]) -# Before running the following lines, notice that: -# The first column always indicates the population size -# The second column is an estimate of the size of the -# first category in the domain SPAM -# The remaining columns estimates the size of the categories -# of Zone within the corresponding category of SPAM -# Finnaly, note that the sum of the point estimates of the last -# two columns gives exactly the point estimate in the second column -E.SI(N,n,SPAM.no) -# Before running the following lines, notice that: -# The first column always indicates the population size -# The second column is an estimate of the size of the -# second category in the domain SPAM -# The remaining columns estimates the size of the categories -# of Zone within the corresponding category of SPAM -# Finnaly, note that the sum of the point estimates of the last two -# columns gives exactly the point estimate in the second column -E.SI(N,n,SPAM.yes) - +N <- nrow(Lucy) +n <- 400 +sam <- S.SI(N, n) +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.SI(N, n, y) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.SI}}, \code{\link{E.STSI}}, \code{\link{GREG.SI}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/E.STPPS.rd b/man/E.STPPS.rd deleted file mode 100644 index c205bdf..0000000 --- a/man/E.STPPS.rd +++ /dev/null @@ -1,57 +0,0 @@ -\name{E.STPPS} -\alias{E.STPPS} -\title{Estimation of the Population Total under Stratified Probability Proportional to Size Sampling With Replacement} -\description{Computes the Hansen-Hurwitz estimator of the population total according to a probability proportional to size -sampling with replacement design} -\usage{ -E.STPPS(y, pk, mh, S) -} -\arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} -\item{pk}{A vector containing selection probabilities for each unit in the sample} -\item{mh}{Vector of sample size in each stratum} -\item{S}{Vector identifying the membership to the strata of each unit in selected sample} -} -\seealso{ -\code{\link{S.STPPS}} -} -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error and its estimated coefficient of variation in all of the stratum and finally in the entire population} -\value{The function returns an array composed by several matrices representing each variable of interest. The columns of each matrix -correspond to the estimated parameters of the variables of interest in each stratum and in the entire population} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} -\examples{ -# Uses the Lucy data to draw a stratified random sample -# according to a PPS design in each stratum - -data(Lucy) -attach(Lucy) -# Level is the stratifying variable -summary(Level) -# Defines the sample size at each stratum -m1<-83 -m2<-100 -m3<-200 -mh<-c(m1,m2,m3) -# Draws a stratified sample -res<-S.STPPS(Level, Income, mh) -# The selected sample -sam<-res[,1] -# The selection probability of each unit in the selected sample -pk <- res[,2] -pk -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.STPPS(estima,pk,mh,Level) -} -\keyword{survey} \ No newline at end of file diff --git a/man/E.STSI.rd b/man/E.STSI.rd deleted file mode 100644 index be94d78..0000000 --- a/man/E.STSI.rd +++ /dev/null @@ -1,73 +0,0 @@ -\name{E.STSI} -\alias{E.STSI} -\title{Estimation of the Population Total under Stratified Simple Random Sampling Without Replacement} -\description{Computes the Horvitz-Thompson estimator of the population total according to a STSI sampling design} -\usage{ -E.STSI(S, Nh, nh, y) -} -\arguments{ -\item{S}{Vector identifying the membership to the strata of each unit in the population} -\item{Nh}{Vector of stratum sizes} -\item{nh}{Vector of sample sizes in each stratum} -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} -} -\seealso{ -\code{\link{S.STSI}} -} -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error and its estimated coefficient of variation in all of the strata and finally in the entire population} -\value{The function returns an array composed by several matrices representing each variable of interest. The columns of each matrix -correspond to the estimated parameters of the variables of interest in each stratum and in the entire population} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} -\examples{ -############ -## Example 1 -############ -# Uses the Lucy data to draw a stratified random sample -# according to a SI design in each stratum - -data(Lucy) -attach(Lucy) -# Level is the stratifying variable -summary(Level) -# Defines the size of each stratum -N1<-summary(Level)[[1]] -N2<-summary(Level)[[2]] -N3<-summary(Level)[[3]] -N1;N2;N3 -Nh <- c(N1,N2,N3) -# Defines the sample size at each stratum -n1<-N1 -n2<-100 -n3<-200 -nh<-c(n1,n2,n3) -# Draws a stratified sample -sam <- S.STSI(Level, Nh, nh) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.STSI(Level,Nh,nh,estima) - -############ -## Example 2 -############ -# Following with Example 1. The variable SPAM is a domain of interest -Doma <- Domains(SPAM) -# This function allows to estimate the parameters of the variables of interest -# for every category in the domain SPAM -SPAM.no <- estima*Doma[,1] -SPAM.yes <- estima*Doma[,2] -E.STSI(Level, Nh, nh, Doma) -E.STSI(Level, Nh, nh, SPAM.no) -E.STSI(Level, Nh, nh, SPAM.yes) -} -\keyword{survey} \ No newline at end of file diff --git a/man/E.STpiPS.Rd b/man/E.STpiPS.Rd deleted file mode 100644 index 1161fa9..0000000 --- a/man/E.STpiPS.Rd +++ /dev/null @@ -1,62 +0,0 @@ -\name{E.STpiPS} -\alias{E.STpiPS} -\title{Estimation of the Population Total under Stratified Probability Proportional to Size Sampling Without Replacement} -\description{Computes the Horvitz-Thompson estimator of the population total according to a probability proportional to size -sampling without replacement design in each stratum} -\usage{ -E.STpiPS(y, pik, S) -} -\arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} -\item{pik}{A vector containing inclusion probabilities for each unit in the sample} -\item{S}{Vector identifying the membership to the strata of each unit in selected sample} -} -\seealso{ -\code{\link{S.STpiPS}} -} -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error, its estimated coefficient of variation and its corresponding DEFF in all of the strata and finally in the entire population} -\value{The function returns an array composed by several matrices representing each variable of interest. The columns of each matrix -correspond to the estimated parameters of the variables of interest in each stratum and in the entire population} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} -\examples{ -# Uses the Lucy data to draw a stratified random sample -# according to a PPS design in each stratum - -data(Lucy) -attach(Lucy) -# Level is the stratifying variable -summary(Level) - -# Defines the size of each stratum -N1<-summary(Level)[[1]] -N2<-summary(Level)[[2]] -N3<-summary(Level)[[3]] -N1;N2;N3 - -# Defines the sample size at each stratum -n1<-N1 -n2<-100 -n3<-200 -nh<-c(n1,n2,n3) -nh -# Draws a stratified sample -S <- Level -x <- Employees - -res <- S.STpiPS(S, x, nh) -sam <- res[,1] -pik <- res[,2] - -data <- Lucy[sam,] -attach(data) - -estima <- data.frame(Income, Employees, Taxes) -E.STpiPS(estima,pik,Level) -} -\keyword{survey} \ No newline at end of file diff --git a/man/E.SY.rd b/man/E.SY.rd index de27043..fc2e6f7 100644 --- a/man/E.SY.rd +++ b/man/E.SY.rd @@ -1,43 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.SY.r \name{E.SY} \alias{E.SY} -\title{Estimation of the Population Total under Systematic Sampling Without Replacement} -\description{Computes the Horvitz-Thompson estimator of the population total according to an SY sampling design} +\title{Estimation of the Population Total under Systematic Sampling} \usage{ E.SY(N, a, y) } \arguments{ -\item{N}{Population size} -\item{a}{Number of groups dividing the population} -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} +\item{N}{Population size.} + +\item{a}{Sampling interval (skip). The expected sample size is \code{N/a}.} + +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every unit in the selected sample.} } -\seealso{ -\code{\link{S.SY}} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error of the total. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. } -\details{Returns the estimation of the population total of every single variable of interest, its estimated standard error and its estimated coefficient of variation under an SY sampling design} -\value{The function returns a data matrix whose columns correspond to the estimated parameters of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Computes the Horvitz-Thompson estimator of the population total under a +systematic sampling design with sampling interval \code{a}. +} +\details{ +Under systematic sampling the sample size is \eqn{n = N/a}. Because only +one systematic sample is observed, the variance cannot be estimated without +assumptions. Here, the variance is approximated by treating the systematic +sample as a simple random sample of the same size, which is a common +conservative approximation. } \examples{ -# Uses the Lucy data to draw a Systematic sample -data(Lucy) +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] -# The population is divided in 6 groups -# The selected sample -sam <- S.SY(N,6) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.SY(N,6,estima) -} -\keyword{survey} \ No newline at end of file +N <- nrow(Lucy) +a <- 10 +sam <- S.SY(N, a) +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.SY(N, a, y) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.SY}}, \code{\link{E.SI}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/E.WR.rd b/man/E.WR.rd index c3ce6e4..1970d78 100644 --- a/man/E.WR.rd +++ b/man/E.WR.rd @@ -1,44 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.WR.r \name{E.WR} \alias{E.WR} -\title{Estimation of the Population Total under Simple Random Sampling With Replacement} -\description{Computes the Hansen-Hurwitz estimator of the population total according to a simple random -sampling with replacement design} +\title{Estimation of the Population Total under Simple Random Sampling With +Replacement} \usage{ E.WR(N, m, y) } \arguments{ -\item{N}{Population size} -\item{m}{Sample size} -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} +\item{N}{Population size.} + +\item{m}{Number of draws (sample size with replacement).} + +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every draw in the sample (repetitions allowed).} } -\seealso{ -\code{\link{S.WR}} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error of the total. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling + without replacement. } -\details{Returns the estimation of the population total of every single variable of interest, its estimated variance and its -estimated coefficient of variation estimated under an simple random with replacement design} -\value{The function returns a data matrix whose columns correspond to the estimated parameters of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Computes the Hansen-Hurwitz estimator of the population total under a +simple random sampling with replacement (WR) design. +} +\details{ +Under simple random sampling with replacement with \code{m} draws, the +Hansen-Hurwitz estimator is \eqn{\hat{t} = (N/m)\sum_{i=1}^m y_i}. } \examples{ -# Uses the Lucy data to draw a random sample according to a WR design -data(Lucy) +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] -m <- 400 -sam <- S.WR(N,m) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.WR(N,m,estima) -} -\keyword{survey} \ No newline at end of file +N <- nrow(Lucy) +m <- 400 +sam <- S.WR(N, m) +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.WR(N, m, y) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.WR}}, \code{\link{E.SI}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/E.piPS.rd b/man/E.piPS.rd index bba91a6..72587d0 100644 --- a/man/E.piPS.rd +++ b/man/E.piPS.rd @@ -1,50 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.piPS.r \name{E.piPS} \alias{E.piPS} -\title{Estimation of the Population Total under Probability Proportional to Size Sampling Without Replacement} -\description{Computes the Horvitz-Thompson estimator of the population total according to a \eqn{\pi}PS sampling design} +\title{Estimation of the Population Total under Pi Probability Proportional to +Size Sampling} \usage{ E.piPS(y, Pik) } \arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} -\item{Pik}{Vector of inclusion probabilities for each unit in the selected sample} +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every unit in the selected sample.} + +\item{Pik}{Vector of first-order inclusion probabilities for each +unit in the sample.} } -\seealso{ -\code{\link{S.piPS}} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error of the total. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. } -\details{Returns the estimation of the population total of every single variable of interest, its estimated variance and its estimated coefficient of variation under a \eqn{\pi}PPS sampling design. This function uses the results of approximate expressions for -the estimated variance of the Horvitz-Thompson estimator} -\value{The function returns a data matrix whose columns correspond to the estimated parameters of the variables of interest} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Matei, A. and Tille, Y. (2005), Evaluation of Variance Approximations and Estimators in Maximun -Entropy Sampling with Unequal Probability and Fixed Sample Design. \emph{Journal of Official Statistics}. Vol 21, 4, 543-570.\cr -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Computes the Horvitz-Thompson estimator of the population total under a +without-replacement probability proportional to size (piPS) sampling design. +The variance is estimated using the Horvitz-Thompson variance approximation +based on first-order inclusion probabilities. +} +\details{ +When all inclusion probabilities are equal (i.e. \code{sum(Pik) == n}), +the variance is set to zero, reflecting an equal-probability design. } \examples{ -# Uses the Lucy data to draw a sample according to a piPS -# without replacement design -data(Lucy) +data('Lucy') attach(Lucy) -# The inclusion probability of each unit is proportional to the variable Income -# The selected sample of size n=400 +N <- nrow(Lucy) n <- 400 -res <- S.piPS(n, Income) -sam <- res[,1] -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# Pik.s is the inclusion probability of every single unit in the selected sample -Pik.s <- res[,2] -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.piPS(estima,Pik.s) -# Same results than HT function -HT(estima, Pik.s) -} -\keyword{survey} \ No newline at end of file +x <- Employees +res <- S.piPS(n, x) +sam <- res[, 1] +Pik <- res[, 2] +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.piPS(y, Pik) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.piPS}}, \code{\link{PikPPS}}, \code{\link{E.PO}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/GREG.SI.rd b/man/GREG.SI.rd index f77b9fb..976303b 100644 --- a/man/GREG.SI.rd +++ b/man/GREG.SI.rd @@ -1,180 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GREG.SI.r \name{GREG.SI} \alias{GREG.SI} -\title{The Generalized Regression Estimator under SI sampling design} -\description{Computes the generalized regression estimator of the population total for several variables of interest under simple random sampling without replacement} +\title{Generalised Regression Estimator under Simple Random Sampling} \usage{ -GREG.SI(N, n, y, x, tx, b, b0=FALSE) +GREG.SI(N, n, y, x, tx, b, b0 = FALSE) } \arguments{ -\item{N}{The population size} -\item{n}{The sample size} -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} -\item{x}{Vector, matrix or data frame containing the recollected auxiliary information for every unit in the selected sample} -\item{tx}{Vector containing the populations totals of the auxiliary information} -\item{b}{Vector of estimated regression coefficients} -\item{b0}{By default FALSE. The intercept of the regression model} -} -\seealso{ -\code{\link{E.Beta}} -} -\value{The function returns a vector of total population estimates for each variable of interest, its estimated standard error and its estimated coefficient of variation.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} -\examples{ -###################################################################### -## Example 1: Linear models involving continuous auxiliary information -###################################################################### - -# Draws a simple random sample without replacement -data(Lucy) -attach(Lucy) - -N <- dim(Lucy)[1] -n <- 400 -sam <- S.SI(N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) - -########### common mean model - -estima<-data.frame(Income, Employees, Taxes) -x <- rep(1,n) -model <- E.Beta(N, n, estima, x, ck=1,b0=FALSE) -b <- t(as.matrix(model[1,,])) -tx <- c(N) -GREG.SI(N,n,estima,x,tx, b, b0=FALSE) - -########### common ratio model - -estima<-data.frame(Income) -x <- data.frame(Employees) -model <- E.Beta(N, n, estima, x, ck=x,b0=FALSE) -b <- t(as.matrix(model[1,,])) -tx <- sum(Lucy$Employees) -GREG.SI(N,n,estima,x,tx, b, b0=FALSE) - -########### Simple regression model without intercept - -estima<-data.frame(Income, Employees) -x <- data.frame(Taxes) -model <- E.Beta(N, n, estima, x, ck=1,b0=FALSE) -b <- t(as.matrix(model[1,,])) -tx <- sum(Lucy$Taxes) -GREG.SI(N,n,estima,x,tx, b, b0=FALSE) - -########### Multiple regression model without intercept - -estima<-data.frame(Income) -x <- data.frame(Employees, Taxes) -model <- E.Beta(N, n, estima, x, ck=1, b0=FALSE) -b <- as.matrix(model[1,,]) -tx <- c(sum(Lucy$Employees), sum(Lucy$Taxes)) -GREG.SI(N,n,estima,x,tx, b, b0=FALSE) - -########### Simple regression model with intercept - -estima<-data.frame(Income, Employees) -x <- data.frame(Taxes) -model <- E.Beta(N, n, estima, x, ck=1,b0=TRUE) -b <- as.matrix(model[1,,]) -tx <- c(N, sum(Lucy$Taxes)) -GREG.SI(N,n,estima,x,tx, b, b0=TRUE) - -########### Multiple regression model with intercept - -estima<-data.frame(Income) -x <- data.frame(Employees, Taxes) -model <- E.Beta(N, n, estima, x, ck=1,b0=TRUE) -b <- as.matrix(model[1,,]) -tx <- c(N, sum(Lucy$Employees), sum(Lucy$Taxes)) -GREG.SI(N,n,estima,x,tx, b, b0=TRUE) - -#################################################################### -## Example 2: Linear models with discrete auxiliary information -#################################################################### - -# Draws a simple random sample without replacement -data(Lucy) +\item{N}{Population size.} -N <- dim(Lucy)[1] -n <- 400 -sam <- S.SI(N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) +\item{n}{Sample size.} -# The auxiliary information is discrete type -Doma<-Domains(Level) +\item{y}{Vector, matrix or data frame of variables of interest.} -########### Poststratified common mean model +\item{x}{Vector, matrix or data frame of auxiliary variables observed +in the sample.} -estima<-data.frame(Income, Employees, Taxes) -model <- E.Beta(N, n, estima, Doma, ck=1,b0=FALSE) -b <- t(as.matrix(model[1,,])) -tx <- colSums(Domains(Lucy$Level)) -GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) +\item{tx}{Vector of known population totals for the auxiliary variables.} -########### Poststratified common ratio model - -estima<-data.frame(Income, Employees) -x <- Doma*Taxes -model <- E.Beta(N, n, estima, x ,ck=1,b0=FALSE) -b <- as.matrix(model[1,,]) -tx <- colSums(Domains(Lucy$Level)*Lucy$Taxes) -GREG.SI(N,n,estima,x,tx, b, b0=FALSE) - -###################################################################### -## Example 3: Domains estimation trough the postestratified estimator -###################################################################### - -# Draws a simple random sample without replacement -data(Lucy) - -N <- dim(Lucy)[1] -n <- 400 -sam <- S.SI(N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) - -# The auxiliary information is discrete type -Doma<-Domains(Level) - -########### Poststratified common mean model for the - # Income total in each poststratum ################### - -estima<-Doma*Income -model <- E.Beta(N, n, estima, Doma, ck=1, b0=FALSE) -b <- t(as.matrix(model[1,,])) -tx <- colSums(Domains(Lucy$Level)) -GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) - -########### Poststratified common mean model for the - # Employees total in each poststratum ################### - -estima<-Doma*Employees -model <- E.Beta(N, n, estima, Doma, ck=1,b0=FALSE) -b <- t(as.matrix(model[1,,])) -tx <- colSums(Domains(Lucy$Level)) -GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) - -########### Poststratified common mean model for the - # Taxes total in each poststratum ################### - -estima<-Doma*Taxes -model <- E.Beta(N, n, estima, Doma, ck=1, b0=FALSE) -b <- t(as.matrix(model[1,,])) -tx <- colSums(Domains(Lucy$Level)) -GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) +\item{b}{Matrix of regression coefficients (e.g. from \code{\link{E.Beta}}).} +\item{b0}{Logical. If \code{TRUE}, an intercept column is prepended to +\code{x}. Default is \code{FALSE}.} +} +\value{ +A matrix with three rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: GREG estimated population total. + \item \code{Standard Error}: Estimated standard error. + \item \code{CVE}: Estimated coefficient of variation (in percentage). +} +} +\description{ +Computes the Generalised Regression (GREG) estimator of the population +total under simple random sampling without replacement, using known +population totals of auxiliary variables to improve efficiency. +} +\details{ +The GREG estimator is: +\deqn{\hat{t}_{GREG} = \hat{t}_{HT} + (\mathbf{t}_x - +\hat{\mathbf{t}}_{x,HT})^T \hat{\boldsymbol{\beta}}} +where \eqn{\hat{\boldsymbol{\beta}}} are the regression coefficients +estimated from the sample, \eqn{\mathbf{t}_x} are the known population +totals, and variance is estimated from the residuals. +} +\examples{ +data('Lucy') +attach(Lucy) +N <- nrow(Lucy) +n <- 400 +sam <- S.SI(N, n) +y <- data.frame(Income = Income[sam]) +x <- data.frame(Employees = Employees[sam]) +tx <- sum(Employees) +b <- E.Beta(N, n, y, x, b0 = FALSE) +GREG.SI(N, n, y, x, tx, b) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.Beta}}, \code{\link{E.SI}}, \code{\link{Wk}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/HH.rd b/man/HH.rd deleted file mode 100644 index 57a142b..0000000 --- a/man/HH.rd +++ /dev/null @@ -1,131 +0,0 @@ -\name{HH} -\alias{HH} -\title{The Hansen-Hurwitz Estimator} -\description{Computes the Hansen-Hurwitz Estimator estimator of the population total for several variables of interest} -\usage{ -HH(y, pk) -} -\arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every unit in the selected sample} -\item{pk}{A vector containing selection probabilities for each unit in the selected sample} -} -\seealso{ -\code{\link{HT}} -} -\details{The Hansen-Hurwitz estimator is given by -\deqn{\sum_{i=1}^m\frac{y_i}{p_i}} -where \eqn{y_i} is the value of the variables of interest for the \eqn{i}th unit, and \eqn{p_i} is its corresponding -selection probability. This estimator is restricted to with replacement sampling designs. -} -\value{The function returns a vector of total population estimates for each variable of interest, its estimated standard error and its estimated coefficient of variation.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} -\examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vectors y1 and y2 give the values of the variables of interest -y1<-c(32, 34, 46, 89, 35) -y2<-c(1,1,1,0,0) -y3<-cbind(y1,y2) -# The population size is N=5 -N <- length(U) -# The sample size is m=2 -m <- 2 -# pk is the probability of selection of every single unit -pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) -# Selection of a random sample with replacement -sam <- sample(5,2, replace=TRUE, prob=pk) -# The selected sample is -U[sam] -# The values of the variables of interest for the units in the sample -y1[sam] -y2[sam] -y3[sam,] -# The Hansen-Hurwitz estimator -HH(y1[sam],pk[sam]) -HH(y2[sam],pk[sam]) -HH(y3[sam,],pk[sam]) - - -############ -## Example 2 -############ -# Uses the Lucy data to draw a simple random sample with replacement -data(Lucy) -attach(Lucy) - -N <- dim(Lucy)[1] -m <- 400 -sam <- sample(N,m,replace=TRUE) -# The vector of selection probabilities of units in the sample -pk <- rep(1/N,m) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -HH(estima, pk) - -################################################################ -## Example 3 HH is unbiased for with replacement sampling designs -################################################################ - -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector y1 and y2 are the values of the variables of interest -y<-c(32, 34, 46, 89, 35) -# The population size is N=5 -N <- length(U) -# The sample size is m=2 -m <- 2 -# pk is the probability of selection of every single unit -pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) -# p is the probability of selection of every possible sample -p <- p.WR(N,m,pk) -p -sum(p) -# The sample membership matrix for random size without replacement sampling designs -Ind <- nk(N,m) -Ind -# The support with the values of the elements -Qy <- SupportWR(N,m, ID=y) -Qy -# The support with the values of the elements -Qp <- SupportWR(N,m, ID=pk) -Qp -# The HT estimates for every single sample in the support -HH1 <- HH(Qy[1,], Qp[1,])[1,] -HH2 <- HH(Qy[2,], Qp[2,])[1,] -HH3 <- HH(Qy[3,], Qp[3,])[1,] -HH4 <- HH(Qy[4,], Qp[4,])[1,] -HH5 <- HH(Qy[5,], Qp[5,])[1,] -HH6 <- HH(Qy[6,], Qp[6,])[1,] -HH7 <- HH(Qy[7,], Qp[7,])[1,] -HH8 <- HH(Qy[8,], Qp[8,])[1,] -HH9 <- HH(Qy[9,], Qp[9,])[1,] -HH10 <- HH(Qy[10,], Qp[10,])[1,] -HH11 <- HH(Qy[11,], Qp[11,])[1,] -HH12 <- HH(Qy[12,], Qp[12,])[1,] -HH13 <- HH(Qy[13,], Qp[13,])[1,] -HH14 <- HH(Qy[14,], Qp[14,])[1,] -HH15 <- HH(Qy[15,], Qp[15,])[1,] -# The HT estimates arranged in a vector -Est <- c(HH1, HH2, HH3, HH4, HH5, HH6, HH7, HH8, HH9, HH10, HH11, HH12, HH13, -HH14, HH15) -Est -# The HT is actually desgn-unbiased -data.frame(Ind, Est, p) -sum(Est*p) -sum(y) -} - -\keyword{survey} diff --git a/man/HT.rd b/man/HT.rd index 0f84ddf..f2c9764 100644 --- a/man/HT.rd +++ b/man/HT.rd @@ -1,303 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/HT.r \name{HT} \alias{HT} -\title{The Horvitz-Thompson Estimator} -\description{Computes the Horvitz-Thompson estimator of the population total for several -variables of interest} +\title{Horvitz-Thompson Estimator of the Population Total} \usage{ HT(y, Pik) } \arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} -\item{Pik}{A vector containing the inclusion probabilities for each unit in the selected sample} +\item{y}{Vector or matrix of values of the variable(s) of interest for +units in the sample.} + +\item{Pik}{Vector of first-order inclusion probabilities for each unit +in the sample.} } -\seealso{ -\code{\link{HH}} +\value{ +A numeric vector or matrix with the estimated total for each variable +of interest. } -\details{The Horvitz-Thompson estimator is given by -\deqn{\sum_{k \in U}\frac{y_k}{{\pi}_k}} -where \eqn{y_k} is the value of the variables of interest for the \eqn{k}th unit, and \eqn{{\pi}_k} -its corresponding inclusion probability. This estimator could be used for without replacement designs -as well as for with replacement designs. +\description{ +Computes the Horvitz-Thompson (HT) estimator of the population total for +one or more variables of interest, given the sample observations and their +first-order inclusion probabilities. } -\value{The function returns a vector of total population estimates for each variable of interest.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\details{ +The Horvitz-Thompson estimator is defined as: +\deqn{\hat{t}_{y,\pi} = \sum_{k \in s} \frac{y_k}{\pi_k}} +where \eqn{\pi_k} is the first-order inclusion probability of unit \eqn{k}. +This estimator is design-unbiased for any fixed-size sampling design. } \examples{ -############ -## Example 1 -############ -# Uses the Lucy data to draw a simple random sample without replacement -data(Lucy) -attach(Lucy) - -N <- dim(Lucy)[1] -n <- 400 -sam <- sample(N,n) -# The vector of inclusion probabilities for each unit in the sample -pik <- rep(n/N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -HT(estima, pik) - -############ -## Example 2 -############ -# Uses the Lucy data to draw a simple random sample with replacement -data(Lucy) - -N <- dim(Lucy)[1] -m <- 400 -sam <- sample(N,m,replace=TRUE) -# The vector of selection probabilities of units in the sample -pk <- rep(1/N,m) -# Computation of the inclusion probabilities -pik <- 1-(1-pk)^m -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -HT(estima, pik) - -############ -## Example 3 -############ -# Without replacement sampling -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector y1 and y2 are the values of the variables of interest -y1<-c(32, 34, 46, 89, 35) -y2<-c(1,1,1,0,0) -y3<-cbind(y1,y2) -# The population size is N=5 -N <- length(U) -# The sample size is n=2 +# Population N = 5, sample size n = 2 +N <- 5 n <- 2 -# The sample membership matrix for fixed size without replacement sampling designs -Ind <- Ik(N,n) -# p is the probability of selection of every possible sample p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -# Computation of the inclusion probabilities -inclusion <- Pik(p, Ind) -# Selection of a random sample -sam <- sample(5,2) -# The selected sample -U[sam] -# The inclusion probabilities for these two units -inclusion[sam] -# The values of the variables of interest for the units in the sample -y1[sam] -y2[sam] -y3[sam,] -# The Horvitz-Thompson estimator -HT(y1[sam],inclusion[sam]) -HT(y2[sam],inclusion[sam]) -HT(y3[sam,],inclusion[sam]) - -############ -## Example 4 -############ -# Following Example 3... With replacement sampling -# The population size is N=5 -N <- length(U) -# The sample size is m=2 -m <- 2 -# pk is the probability of selection of every single unit -pk <- c(0.9, 0.025, 0.025, 0.025, 0.025) -# Computation of the inclusion probabilities -pik <- 1-(1-pk)^m -# Selection of a random sample with replacement -sam <- sample(5,2, replace=TRUE, prob=pk) -# The selected sample -U[sam] -# The inclusion probabilities for these two units -inclusion[sam] -# The values of the variables of interest for the units in the sample -y1[sam] -y2[sam] -y3[sam,] -# The Horvitz-Thompson estimator -HT(y1[sam],inclusion[sam]) -HT(y2[sam],inclusion[sam]) -HT(y3[sam,],inclusion[sam]) - -#################################################################### -## Example 5 HT is unbiased for without replacement sampling designs -## Fixed sample size -#################################################################### - -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector y1 and y2 are the values of the variables of interest -y<-c(32, 34, 46, 89, 35) -# The population size is N=5 -N <- length(U) -# The sample size is n=2 -n <- 2 -# The sample membership matrix for fixed size without replacement sampling designs -Ind <- Ik(N,n) -Ind -# p is the probability of selection of every possible sample -p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -sum(p) -# Computation of the inclusion probabilities -inclusion <- Pik(p, Ind) -inclusion -sum(inclusion) -# The support with the values of the elements -Qy <-Support(N,n,ID=y) -Qy -# The HT estimates for every single sample in the support -HT1<- HT(y[Ind[1,]==1], inclusion[Ind[1,]==1]) -HT2<- HT(y[Ind[2,]==1], inclusion[Ind[2,]==1]) -HT3<- HT(y[Ind[3,]==1], inclusion[Ind[3,]==1]) -HT4<- HT(y[Ind[4,]==1], inclusion[Ind[4,]==1]) -HT5<- HT(y[Ind[5,]==1], inclusion[Ind[5,]==1]) -HT6<- HT(y[Ind[6,]==1], inclusion[Ind[6,]==1]) -HT7<- HT(y[Ind[7,]==1], inclusion[Ind[7,]==1]) -HT8<- HT(y[Ind[8,]==1], inclusion[Ind[8,]==1]) -HT9<- HT(y[Ind[9,]==1], inclusion[Ind[9,]==1]) -HT10<- HT(y[Ind[10,]==1], inclusion[Ind[10,]==1]) -# The HT estimates arranged in a vector -Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10) -Est -# The HT is actually desgn-unbiased -data.frame(Ind, Est, p) -sum(Est*p) -sum(y) - -#################################################################### -## Example 6 HT is unbiased for without replacement sampling designs -## Random sample size -#################################################################### - -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector y1 and y2 are the values of the variables of interest -y<-c(32, 34, 46, 89, 35) -# The population size is N=5 -N <- length(U) -# The sample membership matrix for random size without replacement sampling designs -Ind <- IkRS(N) -Ind -# p is the probability of selection of every possible sample -p <- c(0.59049, 0.06561, 0.06561, 0.06561, 0.06561, 0.06561, 0.00729, 0.00729, - 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00081, - 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, - 0.00009, 0.00009, 0.00009, 0.00009, 0.00009, 0.00001) -sum(p) -# Computation of the inclusion probabilities -inclusion <- Pik(p, Ind) -inclusion -sum(inclusion) -# The support with the values of the elements -Qy <-SupportRS(N, ID=y) -Qy -# The HT estimates for every single sample in the support -HT1<- HT(y[Ind[1,]==1], inclusion[Ind[1,]==1]) -HT2<- HT(y[Ind[2,]==1], inclusion[Ind[2,]==1]) -HT3<- HT(y[Ind[3,]==1], inclusion[Ind[3,]==1]) -HT4<- HT(y[Ind[4,]==1], inclusion[Ind[4,]==1]) -HT5<- HT(y[Ind[5,]==1], inclusion[Ind[5,]==1]) -HT6<- HT(y[Ind[6,]==1], inclusion[Ind[6,]==1]) -HT7<- HT(y[Ind[7,]==1], inclusion[Ind[7,]==1]) -HT8<- HT(y[Ind[8,]==1], inclusion[Ind[8,]==1]) -HT9<- HT(y[Ind[9,]==1], inclusion[Ind[9,]==1]) -HT10<- HT(y[Ind[10,]==1], inclusion[Ind[10,]==1]) -HT11<- HT(y[Ind[11,]==1], inclusion[Ind[11,]==1]) -HT12<- HT(y[Ind[12,]==1], inclusion[Ind[12,]==1]) -HT13<- HT(y[Ind[13,]==1], inclusion[Ind[13,]==1]) -HT14<- HT(y[Ind[14,]==1], inclusion[Ind[14,]==1]) -HT15<- HT(y[Ind[15,]==1], inclusion[Ind[15,]==1]) -HT16<- HT(y[Ind[16,]==1], inclusion[Ind[16,]==1]) -HT17<- HT(y[Ind[17,]==1], inclusion[Ind[17,]==1]) -HT18<- HT(y[Ind[18,]==1], inclusion[Ind[18,]==1]) -HT19<- HT(y[Ind[19,]==1], inclusion[Ind[19,]==1]) -HT20<- HT(y[Ind[20,]==1], inclusion[Ind[20,]==1]) -HT21<- HT(y[Ind[21,]==1], inclusion[Ind[21,]==1]) -HT22<- HT(y[Ind[22,]==1], inclusion[Ind[22,]==1]) -HT23<- HT(y[Ind[23,]==1], inclusion[Ind[23,]==1]) -HT24<- HT(y[Ind[24,]==1], inclusion[Ind[24,]==1]) -HT25<- HT(y[Ind[25,]==1], inclusion[Ind[25,]==1]) -HT26<- HT(y[Ind[26,]==1], inclusion[Ind[26,]==1]) -HT27<- HT(y[Ind[27,]==1], inclusion[Ind[27,]==1]) -HT28<- HT(y[Ind[28,]==1], inclusion[Ind[28,]==1]) -HT29<- HT(y[Ind[29,]==1], inclusion[Ind[29,]==1]) -HT30<- HT(y[Ind[30,]==1], inclusion[Ind[30,]==1]) -HT31<- HT(y[Ind[31,]==1], inclusion[Ind[31,]==1]) -HT32<- HT(y[Ind[32,]==1], inclusion[Ind[32,]==1]) -# The HT estimates arranged in a vector -Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10, HT11, HT12, HT13, - HT14, HT15, HT16, HT17, HT18, HT19, HT20, HT21, HT22, HT23, HT24, HT25, HT26, - HT27, HT28, HT29, HT30, HT31, HT32) -Est -# The HT is actually desgn-unbiased -data.frame(Ind, Est, p) -sum(Est*p) -sum(y) - -################################################################ -## Example 7 HT is unbiased for with replacement sampling designs -################################################################ - -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector y1 and y2 are the values of the variables of interest -y<-c(32, 34, 46, 89, 35) -# The population size is N=5 -N <- length(U) -# The sample size is m=2 -m <- 2 -# pk is the probability of selection of every single unit -pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) -# p is the probability of selection of every possible sample -p <- p.WR(N,m,pk) -p -sum(p) -# The sample membership matrix for random size without replacement sampling designs -Ind <- IkWR(N,m) -Ind -# The support with the values of the elements -Qy <- SupportWR(N,m, ID=y) -Qy -# Computation of the inclusion probabilities -pik <- 1-(1-pk)^m -pik -# The HT estimates for every single sample in the support -HT1 <- HT(y[Ind[1,]==1], pik[Ind[1,]==1]) -HT2 <- HT(y[Ind[2,]==1], pik[Ind[2,]==1]) -HT3 <- HT(y[Ind[3,]==1], pik[Ind[3,]==1]) -HT4 <- HT(y[Ind[4,]==1], pik[Ind[4,]==1]) -HT5 <- HT(y[Ind[5,]==1], pik[Ind[5,]==1]) -HT6 <- HT(y[Ind[6,]==1], pik[Ind[6,]==1]) -HT7 <- HT(y[Ind[7,]==1], pik[Ind[7,]==1]) -HT8 <- HT(y[Ind[8,]==1], pik[Ind[8,]==1]) -HT9 <- HT(y[Ind[9,]==1], pik[Ind[9,]==1]) -HT10 <- HT(y[Ind[10,]==1], pik[Ind[10,]==1]) -HT11 <- HT(y[Ind[11,]==1], pik[Ind[11,]==1]) -HT12 <- HT(y[Ind[12,]==1], pik[Ind[12,]==1]) -HT13 <- HT(y[Ind[13,]==1], pik[Ind[13,]==1]) -HT14 <- HT(y[Ind[14,]==1], pik[Ind[14,]==1]) -HT15 <- HT(y[Ind[15,]==1], pik[Ind[15,]==1]) -# The HT estimates arranged in a vector -Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10, HT11, HT12, HT13, - HT14, HT15) -Est -# The HT is actually desgn-unbiased -data.frame(Ind, Est, p) -sum(Est*p) -sum(y) +y <- c(32, 34, 46, 89, 35) +Ind <- Ik(N, n) +pik <- as.vector(Pik(p, Ind)) +# Select first sample (units 1 and 2) +sam <- c(1, 2) +HT(y[sam], pik[sam]) +} +\references{ +Horvitz, D.G. and Thompson, D.J. (1952). A generalization of sampling +without replacement from a finite universe. +\emph{Journal of the American Statistical Association}, 47, 663-685.\cr +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer. +} +\seealso{ +\code{\link{VarHT}}, \code{\link{E.SI}}, \code{\link{E.piPS}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/IPFP.rd b/man/IPFP.rd index 9b5a7d2..6e213f5 100644 --- a/man/IPFP.rd +++ b/man/IPFP.rd @@ -1,73 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IPFP.r \name{IPFP} \alias{IPFP} -\title{Iterative Proportional Fitting Procedure} -\description{Adjustment of a table on the margins} +\title{Iterative Proportional Fitting Procedure (Raking)} \usage{ -IPFP(Table, Col.knw, Row.knw, tol=0.0001) +IPFP(Table, Col.knw, Row.knw, tol = 1e-04) } \arguments{ -\item{Table}{A contingency table} -\item{Col.knw}{A vector containing the true totals of the columns} -\item{Row.knw}{A vector containing the true totals of the Rows} -\item{tol}{The control value, by default equal to 0.0001} +\item{Table}{A matrix or data frame of initial cell counts or weights to +be adjusted.} + +\item{Col.knw}{Numeric vector of known column marginal totals.} + +\item{Row.knw}{Numeric vector of known row marginal totals.} + +\item{tol}{Convergence tolerance. The algorithm stops when the total +absolute deviation between known and estimated marginals is below +\code{tol}. Default is \code{0.0001}.} } -\details{Adjust a contingency table on the know margins of the population with the Raking Ratio method} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Deming, W. & Stephan, F. (1940), On a least squares adjustment of a sampled frequency -table when the expected marginal totals are known. \emph{Annals of Mathematical Statistics}, 11, 427-444.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\value{ +A matrix with \code{nrow(Table) + 1} rows and \code{ncol(Table) + 1} +columns containing the adjusted cell counts, with an added row of +estimated column totals and an added column of estimated row totals. +} +\description{ +Adjusts a contingency table so that its row and column marginals match +known population totals, using the Iterative Proportional Fitting +Procedure (IPFP), also known as raking or RAS algorithm. +} +\details{ +The algorithm alternates between row and column adjustments until +convergence. At each step, cells in each row (or column) are multiplied +by the ratio of the known marginal to the current estimated marginal. +Convergence is assessed by the sum of absolute differences between +known and estimated marginals. } \examples{ -############ -## Example 1 -############ -# Some example of Ardilly and Tille -Table <- matrix(c(80,90,10,170,80,80,150,210,130),3,3) -rownames(Table) <- c("a1", "a2","a3") -colnames(Table) <- c("b1", "b2","b3") -# The table with labels -Table -# The known and true margins -Col.knw <- c(150,300,550) -Row.knw <- c(430,360,210) -# The adjusted table -IPFP(Table,Col.knw,Row.knw,tol=0.0001) - -############ -## Example 2 -############ -# Draws a simple random sample -data(Lucy) -attach(Lucy) - -N<-dim(Lucy)[1] -n<-400 -sam<-sample(N,n) -data<-Lucy[sam,] -attach(data) -dim(data) -# Two domains of interest -Doma1<-Domains(Level) -Doma2<-Domains(SPAM) -# Cross tabulate of domains -SPAM.no<-Doma2[,1]*Doma1 -SPAM.yes<-Doma2[,2]*Doma1 -# Estimation -E.SI(N,n,Doma1) -E.SI(N,n,Doma2) -est1 <-E.SI(N,n,SPAM.no)[,2:4] -est2 <-E.SI(N,n,SPAM.yes)[,2:4] -est1;est2 -# The contingency table estimated from above -Table <- cbind(est1[1,],est2[1,]) -rownames(Table) <- c("Big", "Medium","Small") -colnames(Table) <- c("SPAM.no", "SPAM.yes") -# The known and true margins -Col.knw <- colSums(Domains(Lucy$SPAM)) -Row.knw<- colSums(Domains(Lucy$Level)) -# The adjusted table -IPFP(Table,Col.knw,Row.knw,tol=0.0001) +# A 2x2 table to be raked to known marginals +Table <- matrix(c(10, 20, 30, 40), nrow = 2) +Row.knw <- c(40, 60) +Col.knw <- c(35, 65) +IPFP(Table, Col.knw, Row.knw) +} +\references{ +Deming, W.E. and Stephan, F.F. (1940). On a least squares adjustment of +a sampled frequency table when the expected marginal totals are known. +\emph{Annals of Mathematical Statistics}, 11(4), 427-444.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Domains}}, \code{\link{Wk}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/Ik.rd b/man/Ik.rd index be3a90a..a3a0945 100644 --- a/man/Ik.rd +++ b/man/Ik.rd @@ -1,33 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Ik.r \name{Ik} \alias{Ik} -\title{Sample Membership Indicator} -\description{Creates a matrix of values (0, if the unit belongs to a specified sample and 1, otherwise) -for every possible sample under fixed sample size designs without replacement} +\title{Sample Membership Indicator Matrix} \usage{ Ik(N, n) } \arguments{ -\item{N}{Population size} -\item{n}{Sample size} +\item{N}{Population size. Recommended \code{N <= 15}.} + +\item{n}{Sample size.} } -\seealso{ -\code{\link{Support}, \link{Pik}} +\value{ +A binary matrix of dimension \code{choose(N, n) x N}, where entry +\eqn{(s, k) = 1} if unit \eqn{k} belongs to sample \eqn{s}, and 0 +otherwise. } -\value{The function returns a matrix of \eqn{binom(N)(n)} rows and \eqn{N} columns. The \eqn{k}th column corresponds to the sample -membership indicator, of the \eqn{k}th unit, to a possible sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Constructs the indicator matrix of the sampling support for a fixed-size +without-replacement design. Each row corresponds to one possible sample +and each column to one population unit. +} +\details{ +The full enumeration of all \code{choose(N, n)} possible samples is +computationally feasible only for small populations. For \code{N > 15} +this function will be very slow. It is intended primarily for theoretical +illustrations and teaching purposes. } \examples{ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) +# All possible samples of size n = 2 from N = 4 units +N <- 4 n <- 2 -# The sample membership matrix for fixed size without replacement sampling designs -Ik(N,n) -# The first unit, Yves, belongs to the first four possible samples +Ik(N, n) +# Number of rows equals choose(N, n) = 6 +nrow(Ik(N, n)) == choose(N, n) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Pik}}, \code{\link{Pikl}}, \code{\link{Support}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/IkRS.rd b/man/IkRS.rd index 8020380..e8777ac 100644 --- a/man/IkRS.rd +++ b/man/IkRS.rd @@ -1,32 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IkRS.r \name{IkRS} \alias{IkRS} -\title{Sample Membership Indicator for Random Size sampling designs} -\description{Creates a matrix of values (0, if the unit belongs to a specified sample and 1, otherwise) -for every possible sample under random sample size designs without replacement} +\title{Sample Membership Indicator Matrix for All Possible Sample Sizes} \usage{ IkRS(N) } \arguments{ -\item{N}{Population size} +\item{N}{Population size. Recommended \code{N <= 10}.} } -\seealso{ -\code{\link{SupportRS}, \link{Pik}} +\value{ +A binary matrix with \eqn{2^N} rows (one per non-empty subset, including +the empty set as the first row of zeros) and \code{N} columns. Entry +\eqn{(s, k) = 1} if unit \eqn{k} belongs to subset \eqn{s}. } -\value{The function returns a matrix of \eqn{2^N} rows and \eqn{N} columns. The \eqn{k}th column corresponds to the sample -membership indicator, of the \eqn{k}th unit, to a possible sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Constructs the indicator matrix of the complete sampling support, stacking +the indicator matrices for all sample sizes from 1 to \code{N}. This +covers every possible non-empty subset of the population. +} +\details{ +This function calls \code{\link{Ik}} for each possible sample size +\eqn{n = 1, \ldots, N} and stacks the results. It is intended for small +populations only (\code{N <= 10}) due to the exponential growth of the +support size. } \examples{ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) -n <- 3 -# The sample membership matrix for fixed size without replacement sampling designs -IkRS(N) -# The first sample is a null one and the last sample is a census +# Full indicator matrix for N = 3 +IkRS(3) +# Number of rows: 1 (empty) + 3 + 3 + 1 = 8 = 2^3 +nrow(IkRS(3)) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Ik}}, \code{\link{SupportRS}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/IkWR.rd b/man/IkWR.rd index b49599b..4a3fd5e 100644 --- a/man/IkWR.rd +++ b/man/IkWR.rd @@ -1,32 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IkWR.r \name{IkWR} \alias{IkWR} -\title{Sample Membership Indicator for with Replacements sampling designs} -\description{Creates a matrix of values (1, if the unit belongs to a specified sample and 0, otherwise) -for every possible sample under fixed sample size designs without replacement} +\title{Frequency Indicator Matrix for With-Replacement Sampling} \usage{ IkWR(N, m) } \arguments{ -\item{N}{Population size} -\item{m}{Sample size} +\item{N}{Population size. Keep small due to combinatorial growth.} + +\item{m}{Number of draws (sample size with replacement).} } -\seealso{ -\code{\link{nk}, \link{Support}, \link{Pik}} +\value{ +A binary matrix of dimension \code{choose(N+m-1, m) x N}, where entry +\eqn{(s, k) = 1} if unit \eqn{k} appears in the \eqn{s}-th outcome of +the with-replacement support, and 0 otherwise. } -\value{The function returns a matrix of \eqn{binom(N+m-1)(m)} rows and \eqn{N} columns. The \eqn{k}th column corresponds to the sample -membership indicator, of the \eqn{k}th unit, to a possible sample. It returns a value of 1, even if the element is selected more than once in a with replacement sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Constructs the indicator matrix of the with-replacement sampling support +for a population of size \code{N} and \code{m} draws. Each row corresponds +to one possible ordered outcome and each column to one population unit, +with entry \eqn{(s, k) = 1} if unit \eqn{k} was selected at least once +in outcome \eqn{s}. +} +\details{ +The with-replacement support is enumerated via \code{\link{SupportWR}}. +This function is intended for small populations and few draws only, as the +support grows rapidly with \code{N} and \code{m}. } \examples{ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) +# With-replacement support: N = 3 units, m = 2 draws +N <- 3 m <- 2 -# The sample membership matrix for fixed size without replacement sampling designs -IkWR(N,m) +IkWR(N, m) +# Number of rows = choose(N + m - 1, m) = choose(4, 2) = 6 +nrow(IkWR(N, m)) == choose(N + m - 1, m) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Ik}}, \code{\link{SupportWR}}, \code{\link{nk}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/Lucy.rd b/man/Lucy.rd deleted file mode 100644 index 99aaf76..0000000 --- a/man/Lucy.rd +++ /dev/null @@ -1,54 +0,0 @@ -\name{Lucy} -\docType{data} -\alias{Lucy} -\title{Some Business Population Database} -\description{ -This data set corresponds to a random sample of BigLucy. It contains some financial variables of 2396 industrial companies of a city in a particular fiscal year. -} -\seealso{ -\code{\link{BigLucy}, \link{BigCity}} -} -\usage{data(Lucy)} -\format{ - \describe{ -\item{ID}{The identifier of the company. It correspond to an alphanumeric sequence (two letters and three digits)} -\item{Ubication}{The address of the principal office of the company in the city} -\item{Level}{The industrial companies are discrimitnated according to the Taxes declared. -There are small, medium and big companies} -\item{Zone}{The city is divided by geoghrafical zones. A company is classified in a particular zone according to its address} -\item{Income}{The total ammount of a company's earnings (or profit) in the previuos fiscal year. It is calculated by taking -revenues and adjusting for the cost of doing business} -\item{Employees}{The total number of persons working for the company in the previuos fiscal year} -\item{Taxes}{The total ammount of a company's income Tax} -\item{SPAM}{Indicates if the company uses the Internet and WEBmail options in order to make self-propaganda.} -} -} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} - -\examples{ -data(Lucy) -attach(Lucy) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -# The population totals -colSums(estima) -# Some parameters of interest -table(SPAM,Level) -xtabs(Income ~ Level+SPAM) -# Correlations among characteristics of interest -cor(estima) -# Some useful histograms -hist(Income) -hist(Taxes) -hist(Employees) -# Some useful plots -boxplot(Income ~ Level) -barplot(table(Level)) -pie(table(SPAM)) -} -\keyword{datasets} diff --git a/man/OrderWR.rd b/man/OrderWR.rd index f9d31fb..bf37d1e 100644 --- a/man/OrderWR.rd +++ b/man/OrderWR.rd @@ -1,54 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/OrderWR.r \name{OrderWR} \alias{OrderWR} -\title{Pseudo-Support for Fixed Size With Replacement Sampling Designs} -\description{Creates a matrix containing every possible ordered sample under fixed sample size with replacement designs} +\title{Ordered With-Replacement Sampling Support} \usage{ -OrderWR(N,m,ID=FALSE) +OrderWR(N, m, ID = FALSE) } \arguments{ -\item{N}{Population size} -\item{m}{Sample size} -\item{ID}{By default FALSE, a vector of values (numeric or string) identifying each unit in the population} +\item{N}{Population size.} + +\item{m}{Number of draws.} + +\item{ID}{Optional vector of population labels of length \code{N}. +If provided, labels are substituted for integer indices in the output. +If \code{FALSE} (default), integer indices are returned.} } -\seealso{ -\code{\link{SupportWR}, \link{Support}} +\value{ +A matrix with \code{N^m} rows and \code{m} columns, where each row is one +ordered sequence of draws. If \code{ID} is provided, population labels are +substituted for indices. } -\details{The number of samples in a with replacement support is not equal to the number -of ordered samples induced by a with replacement sampling design.} -\value{The function returns a matrix of \eqn{N^m} rows and \eqn{m} columns. Each row of this matrix -corresponds to a possible ordered sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}. The author acknowledges to Hanwen Zhang for valuable suggestions.} -\references{ -Tille, Y. (2006), \emph{Sampling Algorithms}. Springer\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas +\description{ +Enumerates all ordered sequences of \code{m} draws from a population of +size \code{N} with replacement. Unlike \code{\link{SupportWR}}, this +function considers order, so sequences that differ only in draw order are +treated as distinct outcomes. +} +\details{ +The total number of ordered with-replacement sequences of size \code{m} +from \code{N} units is \eqn{N^m}. This grows rapidly and the function +should only be used for small \code{N} and \code{m}. } \examples{ -# Vector U contains the label of a population -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) -# Under this context, there are five (5) possible ordered samples -OrderWR(N,1) -# The same output, but labeled -OrderWR(N,1,ID=U) -# y is the variable of interest -y<-c(32,34,46,89,35) -OrderWR(N,1,ID=y) - -# If the smaple size is m=2, there are (25) possible ordered samples -OrderWR(N,2) -# The same output, but labeled -OrderWR(N,2,ID=U) -# y is the variable of interest -y<-c(32,34,46,89,35) -OrderWR(N,2,ID=y) +# All ordered sequences of 2 draws from N = 3 units +OrderWR(N = 3, m = 2) +# N^m = 9 rows -# Note that the number of ordered samples is not equal to the number of -# samples in a well defined with-replacement support -OrderWR(N,2) -SupportWR(N,2) - -OrderWR(N,4) -SupportWR(N,4) +# With population labels +U <- c("A", "B", "C") +OrderWR(N = 3, m = 2, ID = U) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{SupportWR}}, \code{\link{IkWR}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/Pik.rd b/man/Pik.rd index 1b2145f..b61bf7d 100644 --- a/man/Pik.rd +++ b/man/Pik.rd @@ -1,45 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Pik.r \name{Pik} \alias{Pik} -\title{Inclusion Probabilities for Fixed Size Without Replacement Sampling Designs} -\description{Computes the first-order inclusion probability of each unit in the population given a -fixed sample size design} +\title{First-Order Inclusion Probabilities from a Sampling Design} \usage{ Pik(p, Ind) } \arguments{ -\item{p}{A vector containing the selection probabilities of a fixed size without replacement sampling design. The sum of the values of this vector must be one} -\item{Ind}{A sample membership indicator matrix} +\item{p}{Vector of probabilities for each possible sample in the support. +Must sum to 1.} + +\item{Ind}{Indicator matrix of the sampling support, as returned by +\code{\link{Ik}}. Rows are samples, columns are population units.} } -\seealso{ -\code{\link{HT}} +\value{ +A row vector (1 x N matrix) of first-order inclusion probabilities +\eqn{\pi_k = P(k \in s)} for each unit \eqn{k} in the population. } -\details{The inclusion probability of the \eqn{k}th unit is defined as the probability that this unit will be -included in a sample, it is denoted by \eqn{\pi_k} and obtained from a given sampling design as follows: -\deqn{\pi_k=\sum_{s\ni k}p(s)} +\description{ +Computes the first-order inclusion probabilities for each unit in a finite +population, given the probability of each possible sample and the indicator +matrix of the sampling support. } -\value{The function returns a vector of inclusion probabilities for each unit in the finite population.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\details{ +The inclusion probability of unit \eqn{k} is computed as the sum of the +probabilities of all samples that contain unit \eqn{k}: +\deqn{\pi_k = \sum_{s \ni k} p(s)} +The indicator matrix \code{Ind} (output of \code{\link{Ik}}) has one row +per possible sample and one column per population unit, with entry 1 if +unit \eqn{k} is in sample \eqn{s} and 0 otherwise. } \examples{ -# Vector U contains the label of a population of size N=5 +# Population of size N = 5, sample size n = 2 U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") N <- length(U) -# The sample size is n=2 n <- 2 -# The sample membership matrix for fixed size without replacement sampling designs -Ind <- Ik(N,n) -# p is the probability of selection of every sample. +# Sample probabilities (one per possible sample) p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -# Note that the sum of the elements of this vector is one -sum(p) -# Computation of the inclusion probabilities -inclusion <- Pik(p, Ind) -inclusion -# The sum of inclusion probabilities is equal to the sample size n=2 -sum(inclusion) -} -\keyword{survey} +Ind <- Ik(N, n) +pik <- Pik(p, Ind) +pik +# Check: inclusion probabilities sum to n +sum(pik) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Ik}}, \code{\link{Pikl}}, \code{\link{PikPPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/PikHol.rd b/man/PikHol.rd index 2dfb312..23fad66 100644 --- a/man/PikHol.rd +++ b/man/PikHol.rd @@ -1,103 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PikHol.r \name{PikHol} \alias{PikHol} -\title{Optimal Inclusion Probabilities Under Multi-purpose Sampling} -\description{Computes the population vector of optimal inclusion probabilities under the Holmbergs's Approach} +\title{Optimal Inclusion Probabilities for Multiple Surveys (Holmberg)} \usage{ -PikHol(n, sigma, e, Pi) +PikHol(n, sigma, e, Pi = NULL) } \arguments{ -\item{n}{Vector of optimal sample sizes for each of the characteristics of interest.} -\item{sigma}{A matrix containing the size measures for each characteristics of interest.} -\item{e}{Maximum allowed error under the ANOREL approach.} -\item{Pi}{Matrix of first order inclusion probabilities. By default, this probabilites are -proportional to each sigma.} +\item{n}{Integer vector of length \code{p} with the desired sample size +for each of the \code{p} surveys.} + +\item{sigma}{Matrix of dimension \code{N x p} where column \eqn{k} contains +the auxiliary size variable for survey \eqn{k}.} + +\item{e}{Scalar. Relative tolerance parameter controlling the precision +target across surveys.} + +\item{Pi}{Optional matrix of dimension \code{N x p} with initial inclusion +probabilities for each survey. If omitted, \code{\link{PikPPS}} is used.} } -\details{Assuming that all of the characteristic of interest are equally important, the Holmberg's sampling design -yields the following inclusion probabilities -\deqn{\pi_{(opt)k}=\frac{n^*\sqrt{a_{qk}}}{\sum_{k\in U}\sqrt{a_{qk}}}} -where -\deqn{n^*\geq \frac{(\sum_{k\in U}\sqrt{a_{qk}})^2}{(1+c)Q+\sum_{k\in U}a_{qk}}} -and -\deqn{a_{qk}= \sum_{q=1}^Q \frac{\sigma^2_{qk}}{\sum_{k\in U}\left( \frac{1}{\pi_{qk}}-1\right)\sigma^2_{qk}}} -Note that \eqn{\sigma^2_{qk}} is a size measure associated with the k-th element in the q-th characteristic of interest. +\value{ +A numeric vector of length \code{N} with the optimal inclusion probability +for each unit in the population. } -\value{The function returns a vector of inclusion probabilities.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Holmberg, A. (2002), On the Choice of Sampling Design under GREG Estimation in Multiparameter Surveys. -\emph{RD Department, Statistics Sweden}.\cr -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas +\description{ +Computes optimal first-order inclusion probabilities for a population that +is surveyed on multiple occasions, minimising a measure of total variance +across surveys. This implements the approach of Holmberg (2002) for +coordinated sampling over time. +} +\details{ +For each survey \eqn{k}, the initial inclusion probabilities are computed +via \code{\link{PikPPS}}. An optimal composite size measure is then derived +by combining the per-survey auxiliary variables through a weighted sum, and +the final inclusion probabilities are computed proportional to the square +root of this composite. The resulting sample size \code{n.st} is chosen to +minimise total variance subject to a relative precision target \code{e}. } \examples{ - -####################### -#### First example #### -####################### - -# Uses the Lucy data to draw an otpimal sample -# in a multipurpose survey context -data(Lucy) +data('Lucy') attach(Lucy) -# Different sample sizes for two characteristics of interest: Employees and Taxes -N <- dim(Lucy)[1] -n <- c(350,400) -# The size measure is the same for both characteristics of interest, -# but the relationship in between is different -sigy1 <- sqrt(Income^(1)) -sigy2 <- sqrt(Income^(2)) -# The matrix containign the size measures for each characteristics of interest -sigma<-cbind(sigy1,sigy2) -# The vector of optimal inclusion probabilities under the Holmberg's approach -Piks<-PikHol(n,sigma,0.03) -# The optimal sample size is given by the sum of piks -n=round(sum(Piks)) -# Performing the S.piPS function in order to select the optimal sample of size n -res<-S.piPS(n,Piks) -sam <- res[,1] -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# Pik.s is the vector of inclusion probability of every single unit -# in the selected sample -Pik.s <- res[,2] -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -E.piPS(estima,Pik.s) - -######################## -#### Second example #### -######################## - -# We can define our own first inclusion probabilities -data(Lucy) -attach(Lucy) - -N <- dim(Lucy)[1] -n <- c(350,400) - -sigy1 <- sqrt(Income^(1)) -sigy2 <- sqrt(Income^(2)) -sigma<-cbind(sigy1,sigy2) -pikas <- cbind(rep(400/N, N), rep(400/N, N)) - -Piks<-PikHol(n,sigma,0.03, pikas) - -n=round(sum(Piks)) -n - -res<-S.piPS(n,Piks) -sam <- res[,1] - -data <- Lucy[sam,] -attach(data) -names(data) - -Pik.s <- res[,2] -estima <- data.frame(Income, Employees, Taxes) -E.piPS(estima,Pik.s) +# Two surveys with different auxiliary variables +sigma <- cbind(Employees, Income) +n <- c(100, 150) +pik <- PikHol(n, sigma, e = 0.1) +sum(pik <= 1) # all valid probabilities +} +\references{ +Holmberg, A. (2002). A multiparameter perspective on the choice of sampling +design in surveys. \emph{Statistics in Transition}, 5(6), 969-994.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{PikPPS}}, \code{\link{PikSTPPS}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/PikPPS.rd b/man/PikPPS.rd index 4072ec9..aac894f 100644 --- a/man/PikPPS.rd +++ b/man/PikPPS.rd @@ -1,75 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PikPPS.r \name{PikPPS} \alias{PikPPS} -\title{Inclusion Probabilities in Proportional to Size Sampling Designs} -\description{For a given sample size, this function returns a vector of first order -inclusion probabilities for a sampling design proportional to an auxiliary variable} +\title{Inclusion Probabilities Proportional to Size} \usage{ -PikPPS(n,x) +PikPPS(n, x) } \arguments{ -\item{n}{Integer indicating the sample size} -\item{x}{Vector of auxiliary information for each unit in the population} +\item{n}{Desired sample size.} + +\item{x}{Vector of length \code{N} with positive auxiliary size values +for each unit in the population.} } -\seealso{ -\code{\link{PikHol}, \link{E.piPS}, \link{S.piPS}} +\value{ +A numeric vector of length \code{N} with the first-order inclusion +probability for each unit in the population. Values are in \code{(0, 1]}. } -\details{For a given vector of auxiliary information with value \eqn{x_k} for the \eqn{k}-th unit and -population total \eqn{t_x}, the following expression -\deqn{\pi_k=n\times \frac{x_k}{t_x}} -is not always less than unity. A sequential algorithm must be used in order to ensure that for every -unit in the population the inclusion probability gives less or equal to unity.} -\value{The function returns a vector of inclusion probabilities of size \eqn{N}. -Every element of this vector is a value between zero and one.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Computes first-order inclusion probabilities proportional to an auxiliary +size variable \code{x} for a without-replacement sample of size \code{n}. +A sequential truncation algorithm ensures all probabilities are at most 1. +} +\details{ +The initial probabilities \eqn{\pi_k = n x_k / \sum x} may exceed 1 for +large units. The algorithm iteratively sets those probabilities to 1 and +redistributes the remaining sample size among the other units until all +probabilities are valid. The result satisfies \eqn{\sum \pi_k = n}. } \examples{ -############ -## Example 1 -############ -x <- c(30,41,50,170,43,200) -n <- 3 -# Two elements yields values bigger than one -n*x/sum(x) -# With this functions, all of the values are between zero and one -PikPPS(n,x) -# The sum is equal to the sample size -sum(PikPPS(n,x)) - -############ -## Example 2 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# The auxiliary information -x <- c(52, 60, 75, 100, 50) -# Gives the inclusion probabilities for the population accordin to a -# proportional to size design without replacement of size n=4 -pik <- PikPPS(4,x) -pik -# The selected sample is -sum(pik) - -############ -## Example 3 -############ -# Uses the Lucy data to compute teh vector of inclusion probabilities -# accordind to a piPS without replacement design -data(Lucy) +data('Lucy') attach(Lucy) -# The sample size -n=400 -# The selection probability of each unit is proportional to the variable Income -pik <- PikPPS(n,Income) -# The inclusion probabilities of the units in the sample -pik -# The sum of the values in pik is equal to the sample size -sum(pik) -# According to the design some elements must be selected -# They are called forced inclusion units -which(pik==1) +N <- nrow(Lucy) +n <- 400 +Pik <- PikPPS(n, Employees) +# Check: sum equals n +sum(Pik) +# All values are valid probabilities +all(Pik > 0 & Pik <= 1) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.piPS}}, \code{\link{PikSTPPS}}, \code{\link{PikHol}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/Pikl.rd b/man/Pikl.rd index 8d94cc2..7bb1e49 100644 --- a/man/Pikl.rd +++ b/man/Pikl.rd @@ -1,42 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Pikl.r \name{Pikl} \alias{Pikl} -\title{Second Order Inclusion Probabilities for Fixed Size Without Replacement Sampling Designs} -\description{Computes the second-order inclusion probabilities of each par of units in the population given a -fixed sample size design} +\title{Second-Order Inclusion Probabilities} \usage{ Pikl(N, n, p) } \arguments{ -\item{N}{Population size} -\item{n}{Sample size} -\item{p}{A vector containing the selection probabilities of a fixed size without replacement sampling design. The sum of the values of this vector must be one} +\item{N}{Population size. Keep small (recommended \code{N <= 15}) due to +the combinatorial enumeration of all possible samples.} + +\item{n}{Sample size.} + +\item{p}{Vector of probabilities for each possible sample in the support. +Must sum to 1.} } -\seealso{ -\code{\link{VarHT}, \link{Deltakl}, \link{Pik}} +\value{ +An \code{N x N} matrix where entry \eqn{(k, l)} is the probability that +both units \eqn{k} and \eqn{l} are included in the same sample. Diagonal +entries \eqn{(k,k)} equal the first-order inclusion probability \eqn{\pi_k}. } -\details{The second-order inclusion probability of the \eqn{kl}th units is defined as the probability that unit \eqn{k} and unit -\eqn{l} will be both included in a sample; it is denoted by \eqn{\pi_{kl}} and obtained from a given sampling design as follows: -\deqn{\pi_{kl}=\sum_{s\ni k,l}p(s)} +\description{ +Computes the matrix of second-order inclusion probabilities +\eqn{\pi_{kl} = P(k \in s \text{ and } l \in s)} for all pairs of units +in a finite population of size \code{N} under a fixed-size sampling design. } -\value{The function returns a symmetric matrix of size \eqn{N \times N} containing the second-order inclusion probabilities -for each pair of units in the finite population.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\details{ +The second-order inclusion probabilities are needed to compute the exact +Horvitz-Thompson variance estimator and the Sen-Yates-Grundy variance +estimator. This function enumerates the full sampling support via +\code{\link{Ik}} and is therefore only feasible for small populations +(\code{N <= 15}). } \examples{ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) -# The sample size is n=2 +# Population N = 5, sample size n = 2 +N <- 5 n <- 2 -# p is the probability of selection of every sample. p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -# Note that the sum of the elements of this vector is one -sum(p) -# Computation of the second-order inclusion probabilities -Pikl(N, n, p) +pi2 <- Pikl(N, n, p) +pi2 +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Pik}}, \code{\link{Deltakl}}, \code{\link{VarHT}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/S.BE.rd b/man/S.BE.rd index ed7f20f..482b0ad 100644 --- a/man/S.BE.rd +++ b/man/S.BE.rd @@ -1,56 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.BE.r \name{S.BE} \alias{S.BE} -\title{Bernoulli Sampling Without Replacement} -\description{Draws a Bernoulli sample without replacement of expected size $n$ from a population of size $N$} +\title{Bernoulli Sampling} \usage{ S.BE(N, prob) } \arguments{ -\item{N}{Population size} -\item{prob}{Inclusion probability for each unit in the population} +\item{N}{Population size.} + +\item{prob}{Scalar. Inclusion probability, must satisfy \code{0 < prob <= 1}.} } -\seealso{ -\code{\link{E.BE}} -} -\details{The selected sample is drawn according to a sequential procedure algorithm based on an uniform distribution. The Bernoulli -sampling design is not a fixed sample size one.} -\value{The function returns a vector of size \eqn{N}. Each element of this vector indicates if the unit was selected. Then, -if the value of this vector for unit \eqn{k} is zero, the unit \eqn{k} was not selected in the sample; otherwise, the unit was -selected in the sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas.\cr -Tille, Y. (2006), \emph{Sampling Algorithms}. Springer. +\value{ +A vector of length \code{N} where selected units contain their population +index and non-selected units contain \code{0}. +} +\description{ +Draws a Bernoulli sample from a finite population of size \code{N}. +Each unit is independently selected with the same inclusion probability +\code{prob}. +} +\details{ +The sample size under Bernoulli sampling is random, following a +Binomial(\code{N}, \code{prob}) distribution. To extract the selected +indices, use \code{sam[sam != 0]}. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Draws a Bernoulli sample without replacement of expected size n=3 -# The inlusion probability is 0.6 for each unit in the population -sam <- S.BE(5,0.6) -sam -# The selected sample is -U[sam] +# Population of size N = 100, inclusion probability 10\% +N <- 100 +prob <- 0.1 +sam <- S.BE(N, prob) -############ -## Example 2 -############ -# Uses the Lucy data to draw a Bernoulli sample +# Extract selected indices +selected <- sam[sam != 0] +length(selected) # random, around 10 -data(Lucy) -attach(Lucy) -N <- dim(Lucy)[1] -# The population size is 2396. If the expected sample size is 400 -# then, the inclusion probability must be 400/2396=0.1669 -sam <- S.BE(N,0.01669) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -} -\keyword{survey} +# Using Lucy data +data('Lucy') +N <- nrow(Lucy) +prob <- 0.05 +sam <- S.BE(N, prob) +sam <- sam[sam != 0] +y <- data.frame(Income = Lucy$Income[sam]) +E.BE(y, prob) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.BE}}, \code{\link{S.PO}}, \code{\link{S.SI}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/S.PO.rd b/man/S.PO.rd index f3934af..dbf2f3c 100644 --- a/man/S.PO.rd +++ b/man/S.PO.rd @@ -1,59 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.PO.r \name{S.PO} \alias{S.PO} \title{Poisson Sampling} -\description{Draws a Poisson sample of expected size $n$ from a population of size $N$} \usage{ S.PO(N, Pik) } \arguments{ -\item{N}{Population size} -\item{Pik}{Vector of inclusion probabilities for each unit in the population} +\item{N}{Population size.} + +\item{Pik}{Vector of length \code{N} containing the first-order inclusion +probability for each unit in the population. Values must be in \code{(0, 1]}.} } -\seealso{ -\code{\link{E.PO}} -} -\details{The selected sample is drawn according to a sequential procedure algorithm based on a uniform distribution. The Poisson -sampling design is not a fixed sample size one.} -\value{The function returns a vector of size \eqn{N}. Each element of this vector indicates if the unit was selected. Then, -if the value of this vector for unit \eqn{k} is zero, the unit \eqn{k} was not selected in the sample; otherwise, the unit was -selected in the sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas.\cr -Tille, Y. (2006), \emph{Sampling Algorithms}. Springer. +\value{ +A vector of length \code{N} where selected units contain their population +index and non-selected units contain \code{0}. +} +\description{ +Draws a Poisson sample from a finite population of size \code{N}. +Each unit \eqn{k} is independently selected with its own inclusion +probability \eqn{\pi_k}. +} +\details{ +Poisson sampling is a generalisation of Bernoulli sampling that allows +unequal inclusion probabilities. The sample size is random. To extract +the selected indices, use \code{sam[sam != 0]}. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Draws a Bernoulli sample without replacement of expected size n=3 -# "Erik" is drawn in every possible sample becuse its inclusion probability is one -Pik <- c(0.5, 0.2, 1, 0.9, 0.5) -sam <- S.PO(5,Pik) -sam -# The selected sample is -U[sam] - -############ -## Example 2 -############ -# Uses the Lucy data to draw a Poisson sample -data(Lucy) +data('Lucy') attach(Lucy) -N <- dim(Lucy)[1] -n <- 400 -Pik<-n*Income/sum(Income) -# None element of Pik bigger than one -which(Pik>1) -# The selected sample -sam <- S.PO(N,Pik) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -} -\keyword{survey} +N <- nrow(Lucy) +n <- 400 +Pik <- PikPPS(n, Employees) +sam <- S.PO(N, Pik) +sam <- sam[sam != 0] +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.PO(y, Pik[sam]) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.PO}}, \code{\link{PikPPS}}, \code{\link{S.piPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/S.PPS.rd b/man/S.PPS.rd index e3922f2..db3d8bd 100644 --- a/man/S.PPS.rd +++ b/man/S.PPS.rd @@ -1,55 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.PPS.r \name{S.PPS} \alias{S.PPS} -\title{Probability Proportional to Size Sampling With Replacement} -\description{Draws a probability proportional to size sample with replacement of size \eqn{m} from a population of size \eqn{N}} +\title{Probability Proportional to Size With-Replacement Sampling} \usage{ -S.PPS(m,x) +S.PPS(m, x) } \arguments{ -\item{m}{Sample size} -\item{x}{Vector of auxiliary information for each unit in the population} +\item{m}{Number of draws (sample size with replacement).} + +\item{x}{Vector of length \code{N} containing positive auxiliary size +values for each unit in the population.} } -\seealso{ -\code{\link{E.PPS}} +\value{ +A matrix with \code{m} rows and two columns: +\itemize{ + \item Column 1 (\code{sam}): population indices of the selected units. + \item Column 2 (\code{pk}): selection probability of each draw. } -\details{The selected sample is drawn according to the cumulative total method (sequential-list procedure)} -\value{The function returns a matrix of \eqn{m} rows and two columns. Each element of the first column indicates the unit that -was selected. Each element of the second column indicates the selection probability of this unit} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Draws a with-replacement sample of size \code{m} from a finite population +using probabilities proportional to an auxiliary size variable \code{x}. +} +\details{ +At each draw, unit \eqn{k} is selected with probability +\eqn{p_k = x_k / \sum x}. Since sampling is with replacement, the same +unit may appear more than once. Use \code{\link{E.PPS}} or \code{\link{HH}} +to estimate population totals from this sample. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# The auxiliary information -x <- c(52, 60, 75, 100, 50) -# Draws a PPS sample with replacement of size m=3 -res <- S.PPS(3,x) -sam <- res[,1] -# The selected sample is -U[sam] - -############ -## Example 2 -############ -# Uses the Lucy data to draw a random sample according to a -# PPS with replacement design -data(Lucy) +data('Lucy') attach(Lucy) -# The selection probability of each unit is proportional to the variable Income -m <- 400 -res<-S.PPS(400,Income) -# The selected sample -sam <- res[,1] -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -} -\keyword{survey} +m <- 400 +res <- S.PPS(m, Employees) +sam <- res[, 1] +pk <- res[, 2] +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.PPS(y, pk) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.PPS}}, \code{\link{HH}}, \code{\link{S.piPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/S.SI.rd b/man/S.SI.rd index 9d25897..3c31dd8 100644 --- a/man/S.SI.rd +++ b/man/S.SI.rd @@ -1,58 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.SI.r \name{S.SI} \alias{S.SI} \title{Simple Random Sampling Without Replacement} -\description{Draws a simple random sample without replacement of size \eqn{n} from a population of size \eqn{N}} \usage{ -S.SI(N, n, e=runif(N)) +S.SI(N, n, e = runif(N)) } \arguments{ -\item{N}{Population size} -\item{n}{Sample size} -\item{e}{By default, a vector of size \eqn{N} of independent random numbers drawn from the \eqn{Uniform(0,1)}} +\item{N}{Population size.} + +\item{n}{Sample size. Must satisfy \code{n <= N}.} + +\item{e}{Optional vector of \code{N} uniform random variates in \code{(0,1)}. +If omitted, \code{runif(N)} is used. Useful for reproducibility or +coordinated sampling.} } -\seealso{ -\code{\link{E.SI}} -} -\details{The selected sample is drawn according to a selection-rejection (list-sequential) algorithm} -\value{The function returns a vector of size \eqn{N}. Each element of this vector indicates if the unit was selected. Then, -if the value of this vector for unit \eqn{k} is zero, the unit \eqn{k} was not selected in the sample; otherwise, the unit was -selected in the sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Fan, C.T., Muller, M.E., Rezucha, I. (1962), Development of sampling plans by using sequential -(item by item) selection techniques and digital computer, \emph{Journal of the American Statistical Association}, 57, 387-402.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\value{ +A vector of length \code{N} where selected units contain their population +index and non-selected units contain \code{0}. +} +\description{ +Draws a simple random sample of size \code{n} without replacement from a +finite population of size \code{N} using the sequential algorithm of +Fan, Muller and Rezucha (1962). +} +\details{ +The sequential algorithm selects units one at a time by comparing a uniform +random variate with the conditional inclusion probability at each step, +ensuring exactly \code{n} units are selected. To extract the selected +indices, filter out the zeros: \code{sam[sam != 0]}. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Fixes the random numbers in order to select a sample -# Ideal for teaching purposes in the blackboard -e <- c(0.4938, 0.7044, 0.4585, 0.6747, 0.0640) -# Draws a simple random sample without replacement of size n=3 -sam <- S.SI(5,3,e) -sam -# The selected sample is -U[sam] - -############ -## Example 2 -############ -# Uses the Marco and Lucy data to draw a random sample according to a SI design -data(Marco) -data(Lucy) - -N <- dim(Lucy)[1] -n <- 400 -sam<-S.SI(N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -} -\keyword{survey} +data('Lucy') +attach(Lucy) +N <- nrow(Lucy) +n <- 400 +sam <- S.SI(N, n) +sam <- sam[sam != 0] +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.SI(N, n, y) +} +\references{ +Fan, C.T., Muller, M.E. and Rezucha, I. (1962). Development of sampling +plans by using sequential (item by item) selection techniques and digital +computers. \emph{Journal of the American Statistical Association}, +57(298), 387-402.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.SI}}, \code{\link{S.STSI}}, \code{\link{S.SY}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/S.STPPS.rd b/man/S.STPPS.rd index e08c754..2fa5a04 100644 --- a/man/S.STPPS.rd +++ b/man/S.STPPS.rd @@ -1,74 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.STPPS.r \name{S.STPPS} \alias{S.STPPS} -\title{Stratified Sampling Applying PPS Design in all Strata} -\description{Draws a probability proportional to size simple random sample with -replacement of size \eqn{m_h} in stratum \eqn{h} of size \eqn{N_h}} +\title{Stratified Probability Proportional to Size With-Replacement Sampling} \usage{ -S.STPPS(S,x,mh) +S.STPPS(S, x, mh) } \arguments{ -\item{S}{Vector identifying the membership to the strata of each unit in the population} -\item{x}{Vector of auxiliary information for each unit in the population} -\item{mh}{Vector of sample size in each stratum} +\item{S}{Vector of length \code{N} identifying the stratum membership of +each unit in the population.} + +\item{x}{Vector of length \code{N} containing positive auxiliary size +values for each unit in the population.} + +\item{mh}{Integer vector of length \code{H} specifying the number of +draws within each stratum.} } -\seealso{ -\code{\link{E.STPPS}} +\value{ +A data frame with \code{sum(mh)} rows and two columns: +\itemize{ + \item \code{sam}: population indices of the selected units. + \item \code{pk}: within-stratum selection probabilities of each draw. } -\details{The selected sample is drawn according to the cumulative total method (sequential-list procedure) in each stratum} -\value{The function returns a matrix of \eqn{m=m_1+\cdots+m_h} rows and two columns. Each element of the first column indicates the unit that -was selected. Each element of the second column indicates the selection probability of this unit} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Draws a stratified with-replacement sample where within each stratum units +are selected using probability proportional to size (PPS-WR). +} +\details{ +Within each stratum \eqn{h}, \code{mh[h]} draws are made with +probabilities \eqn{p_k = x_k / \sum_{k \in h} x_k}. The same unit may +appear more than once within a stratum. Use \code{\link{E.STPPS}} to +estimate population totals from this sample. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# The auxiliary information -x <- c(52, 60, 75, 100, 50) -# Vector Strata contains an indicator variable of stratum membership -Strata <- c("A", "A", "A", "B", "B") -# Then sample size in each stratum -mh <- c(2,2) -# Draws a stratified PPS sample with replacement of size n=4 -res <- S.STPPS(Strata, x, mh) -# The selected sample -sam <- res[,1] -U[sam] -# The selection probability of each unit selected to be in the sample -pk <- res[,2] -pk - -############ -## Example 2 -############ -# Uses the Lucy data to draw a stratified random sample -# according to a PPS design in each stratum - -data(Lucy) +data('Lucy') attach(Lucy) -# Level is the stratifying variable -summary(Level) -# Defines the sample size at each stratum -m1<-70 -m2<-100 -m3<-200 -mh<-c(m1,m2,m3) -# Draws a stratified sample -res<-S.STPPS(Level, Income, mh) -# The selected sample -sam<-res[,1] -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -# The selection probability of each unit selected in the sample -pk <- res[,2] -pk +mh <- c(20, 30, 50) +res <- S.STPPS(Level, Employees, mh) +head(res) +sam <- res$sam +pk <- res$pk +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.STPPS(y, pk, mh, Level[sam]) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.PPS}}, \code{\link{S.STpiPS}}, \code{\link{E.STPPS}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/S.STSI.rd b/man/S.STSI.rd index cc47eea..89db823 100644 --- a/man/S.STSI.rd +++ b/man/S.STSI.rd @@ -1,70 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.STSI.r \name{S.STSI} \alias{S.STSI} -\title{Stratified sampling applying SI design in all strata} -\description{Draws a simple random sample without replacement of size \eqn{n_h} in stratum \eqn{h} of size \eqn{N_h}} +\title{Stratified Simple Random Sampling Without Replacement} \usage{ S.STSI(S, Nh, nh) } \arguments{ -\item{S}{Vector identifying the membership to the strata of each unit in the population} -\item{Nh}{Vector of stratum sizes} -\item{nh}{Vector of sample size in each stratum} +\item{S}{Vector of length \code{N} identifying the stratum membership of +each unit in the population.} + +\item{Nh}{Integer vector of length \code{H} with the population size of +each stratum.} + +\item{nh}{Integer vector of length \code{H} with the sample size of each +stratum. Must satisfy \code{nh[h] <= Nh[h]} for all \code{h}.} } -\seealso{ -\code{\link{E.STSI}} +\value{ +A sorted vector of population indices of the selected units, of length +\code{sum(nh)}. } -\details{The selected sample is drawn according to a selection-rejection (list-sequential) algorithm in each stratum} -\value{The function returns a vector of size \eqn{n=n_1+\cdots+n_H}. Each element of this vector indicates the unit that was selected.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Draws a stratified simple random sample without replacement from a finite +population. Within each stratum, units are selected by simple random +sampling without replacement. +} +\details{ +The function selects \code{nh[h]} units from stratum \eqn{h} using +\code{base::sample}, and returns all selected indices sorted in ascending +order. Use \code{\link{E.STSI}} to estimate population totals from this +sample. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector Strata contains an indicator variable of stratum membership -Strata <- c("A", "A", "A", "B", "B") -Strata -# The stratum sizes -Nh <- c(3,2) -# Then sample size in each stratum -nh <- c(2,1) -# Draws a stratified simple random sample without replacement of size n=3 -sam <- S.STSI(Strata, Nh, nh) -sam -# The selected sample is -U[sam] - -############ -## Example 2 -############ -# Uses the Lucy data to draw a stratified random sample -# accordind to a SI design in each stratum -data(Lucy) +data('Lucy') attach(Lucy) -# Level is the stratifying variable -summary(Level) -# Defines the size of each stratum -N1<-summary(Level)[[1]] -N2<-summary(Level)[[2]] -N3<-summary(Level)[[3]] -N1;N2;N3 -Nh <- c(N1,N2,N3) -# Defines the sample size at each stratum -n1<-70 -n2<-100 -n3<-200 -nh<-c(n1,n2,n3) -# Draws a stratified sample +N <- nrow(Lucy) +Nh <- as.numeric(table(Level)) +nh <- c(70, 100, 200) sam <- S.STSI(Level, Nh, nh) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.STSI(Level[sam], Nh, nh, y) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.STSI}}, \code{\link{S.SI}}, \code{\link{S.STpiPS}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/S.STpiPS.Rd b/man/S.STpiPS.Rd index b387324..a8d8c86 100644 --- a/man/S.STpiPS.Rd +++ b/man/S.STpiPS.Rd @@ -1,84 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.STpiPS.R \name{S.STpiPS} \alias{S.STpiPS} -\title{Stratified Sampling Applying Without Replacement piPS Design in all Strata} -\description{Draws a probability proportional to size simple random sample without -replacement of size \eqn{n_h} in stratum \eqn{h} of size \eqn{N_h}} +\title{Stratified Probability Proportional to Size Without-Replacement Sampling} \usage{ -S.STpiPS(S,x,nh) +S.STpiPS(S, x, nh) } \arguments{ -\item{S}{Vector identifying the membership to the strata of each unit in the population} -\item{x}{Vector of auxiliary information for each unit in the population} -\item{nh}{Vector of sample size in each stratum} +\item{S}{Vector of length \code{N} identifying the stratum membership of +each unit in the population.} + +\item{x}{Vector of length \code{N} containing positive auxiliary size +values for each unit in the population.} + +\item{nh}{Integer vector of length \code{H} specifying the sample size +within each stratum.} } -\seealso{ -\code{\link{E.STpiPS}} +\value{ +A matrix with \code{sum(nh)} rows and two columns, sorted by population +index: +\itemize{ + \item Column 1: population indices of the selected units. + \item Column 2: first-order inclusion probabilities of the selected units. } -\details{The selected sample is drawn according to the Sunter method (sequential-list procedure) in each stratum} -\value{The function returns a matrix of \eqn{n=n_1+\cdots+n_h} rows and two columns. Each element of the first column indicates the unit that -was selected. Each element of the second column indicates the inclusion probability of this unit} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +} +\description{ +Draws a stratified sample where within each stratum units are selected +using a probability proportional to size without-replacement (piPS) design. +} +\details{ +Within each stratum \eqn{h}, the function calls \code{\link{S.piPS}} to +draw \code{nh[h]} units with probabilities proportional to \code{x}. +The global population indices are preserved in the output. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# The auxiliary information -x <- c(52, 60, 75, 100, 50) -# Vector Strata contains an indicator variable of stratum membership -Strata <- c("A", "A", "A", "B", "B") -# Then sample size in each stratum -mh <- c(2,2) -# Draws a stratified PPS sample with replacement of size n=4 -res <- S.STPPS(Strata, x, mh) -# The selected sample -sam <- res[,1] -U[sam] -# The selection probability of each unit selected to be in the sample -pk <- res[,2] -pk - -############ -## Example 2 -############ -# Uses the Lucy data to draw a stratified random sample -# according to a piPS design in each stratum - -data(Lucy) +data('Lucy') attach(Lucy) -# Level is the stratifying variable -summary(Level) - -# Defines the size of each stratum -N1<-summary(Level)[[1]] -N2<-summary(Level)[[2]] -N3<-summary(Level)[[3]] -N1;N2;N3 - -# Defines the sample size at each stratum -n1<-70 -n2<-100 -n3<-200 -nh<-c(n1,n2,n3) -nh -# Draws a stratified sample -S <- Level -x <- Employees - -res <- S.STpiPS(S, x, nh) -sam<-res[,1] -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -# The selection probability of each unit selected in the sample -pik <- res[,2] -pik +N <- nrow(Lucy) +n1 <- 70; n2 <- 100; n3 <- 200 +nh <- c(n1, n2, n3) +res <- S.STpiPS(Level, Employees, nh) +head(res) +sam <- res[, 1] +Pik <- res[, 2] +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.STpiPS(y, Pik, Level[sam]) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.piPS}}, \code{\link{S.STSI}}, \code{\link{E.STpiPS}}, + \code{\link{PikSTPPS}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/S.SY.rd b/man/S.SY.rd index 46e6e13..4955d59 100644 --- a/man/S.SY.rd +++ b/man/S.SY.rd @@ -1,55 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.SY.r \name{S.SY} \alias{S.SY} \title{Systematic Sampling} -\description{Draws a Systematic sample of size $n$ from a population of size $N$} \usage{ S.SY(N, a) } \arguments{ -\item{N}{Population size} -\item{a}{Number of groups dividing the population} +\item{N}{Population size.} + +\item{a}{Sampling interval (skip). The expected sample size is +approximately \code{N/a}.} } -\seealso{ -\code{\link{E.SY}} +\value{ +A vector containing the population indices of the selected units. } -\details{The selected sample is drawn according to a random start.} -\value{The function returns a vector of size \eqn{n}. Each element of this vector indicates the unit that was selected.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}. The author acknowledges to -Kristina Stodolova \email{Kristyna.Stodolova@seznam.cz} for valuable suggestions.} -\references{ -Madow, L.H. and Madow, W.G. (1944), On the theory of systematic sampling. \emph{Annals of Mathematical Statistics}. 15, 1-24.\cr -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling. Springer}.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Draws a systematic sample from a finite population of size \code{N} using +a fixed sampling interval \code{a}. A random start \code{r} is chosen +uniformly from \code{1} to \code{a}, and every \code{a}-th unit thereafter +is selected. +} +\details{ +The random start \code{r} is drawn from \code{sample(a, 1)}, and then +units \eqn{r, r+a, r+2a, \ldots} are selected. If \code{N} is not a +multiple of \code{a}, the sample size varies by one unit depending on the +random start. Use \code{\link{E.SY}} to estimate population totals. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# The population of size N=5 is divided in a=2 groups -# Draws a Systematic sample. -sam <- S.SY(5,2) -sam -# The selected sample is -U[sam] -# There are only two possible samples - -############ -## Example 2 -############ -# Uses the Lucy data to draw a Systematic sample -data(Lucy) +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] -# The population is divided in 6 groups -# The selected sample -sam <- S.SY(N,6) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -} -\keyword{survey} +N <- nrow(Lucy) +a <- 10 +sam <- S.SY(N, a) +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.SY(N, a, y) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.SY}}, \code{\link{S.SI}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/S.WR.rd b/man/S.WR.rd index 4a8dafa..05f4203 100644 --- a/man/S.WR.rd +++ b/man/S.WR.rd @@ -1,51 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.WR.r \name{S.WR} \alias{S.WR} \title{Simple Random Sampling With Replacement} -\description{Draws a simple random sample witht replacement of size \eqn{m} from a population of size \eqn{N}} \usage{ S.WR(N, m) } \arguments{ -\item{N}{Population size} -\item{m}{Sample size} +\item{N}{Population size.} + +\item{m}{Number of draws (sample size with replacement).} } -\seealso{ -\code{\link{E.WR}} +\value{ +A vector of population indices of length \code{m}, where each element is +the index of a selected unit. Units may appear more than once. +} +\description{ +Draws a simple random sample of size \code{m} with replacement from a +finite population of size \code{N}. Returns the frequency of selection +for each unit drawn at least once. } -\details{The selected sample is drawn according to a sequential procedure algorithm based on a binomial distribution} -\value{The function returns a vector of size \eqn{m}. Each element of this vector indicates the unit that was selected.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Tille, Y. (2006), \emph{Sampling Algorithms}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\details{ +The number of times each unit is selected follows a multinomial +distribution with equal probabilities \eqn{1/N}. The function uses a +sequential binomial draw approach. Use \code{\link{E.WR}} to estimate +population totals. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Draws a simple random sample witho replacement of size m=3 -sam <- S.WR(5,3) -sam -# The selected sample -U[sam] - -############ -## Example 2 -############ -# Uses the Lucy data to draw a random sample of units accordind to a -# simple random sampling with replacement design -data(Lucy) +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] -m <- 400 -sam<-S.WR(N,m) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -} -\keyword{survey} +N <- nrow(Lucy) +m <- 400 +sam <- S.WR(N, m) +y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) +E.WR(N, m, y) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.WR}}, \code{\link{S.SI}}, \code{\link{S.PPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/S.piPS.rd b/man/S.piPS.rd deleted file mode 100644 index 2ef16df..0000000 --- a/man/S.piPS.rd +++ /dev/null @@ -1,60 +0,0 @@ -\name{S.piPS} -\alias{S.piPS} -\title{Probability Proportional to Size Sampling Without Replacement} -\description{Draws a probability proportional to size sample without replacement of size \eqn{n} from a population of size \eqn{N}.} -\usage{ -S.piPS(n, x, e) -} -\arguments{ -\item{x}{Vector of auxiliary information for each unit in the population} -\item{n}{Sample size} -\item{e}{By default, a vector of size \eqn{N} of independent random numbers drawn from the \eqn{Uniform(0,1)}} -} -\seealso{ -\code{\link{E.piPS}} -} -\details{The selected sample is drawn according to the Sunter method (sequential-list procedure)} -\value{The function returns a matrix of \eqn{m} rows and two columns. Each element of the first column indicates the unit that -was selected. Each element of the second column indicates the selection probability of this unit} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} -\examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# The auxiliary information -x <- c(52, 60, 75, 100, 50) -# Draws a piPS sample without replacement of size n=3 -res <- S.piPS(3,x) -res -sam <- res[,1] -sam -# The selected sample is -U[sam] - -############ -## Example 2 -############ -# Uses the Lucy data to draw a random sample of units accordind to a -# piPS without replacement design - -data(Lucy) -attach(Lucy) -# The selection probability of each unit is proportional to the variable Income -res <- S.piPS(400,Income) -# The selected sample -sam <- res[,1] -# The inclusion probabilities of the units in the sample -Pik.s <- res[,2] -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -data -dim(data) -} -\keyword{survey} diff --git a/man/Support.rd b/man/Support.rd index d2a1914..23a06a7 100644 --- a/man/Support.rd +++ b/man/Support.rd @@ -1,42 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Support.r \name{Support} \alias{Support} -\title{Sampling Support for Fixed Size Without Replacement Sampling Designs} -\description{Creates a matrix containing every possible sample under fixed sample size designs} +\title{Sampling Support for Fixed-Size Without-Replacement Designs} \usage{ -Support(N, n, ID=FALSE) +Support(N, n, ID = FALSE) } \arguments{ -\item{N}{Population size} -\item{n}{Sample size} -\item{ID}{By default FALSE, a vector of values (numeric or string) identifying each unit in the population} +\item{N}{Population size. Recommended \code{N <= 15}.} + +\item{n}{Sample size.} + +\item{ID}{Optional vector of population labels of length \code{N}. +If provided, labels replace integer indices in the output. +If \code{FALSE} (default), integer indices are returned.} } -\seealso{ -\code{\link{Ik}} +\value{ +A matrix with \code{choose(N, n)} rows and \code{n} columns. Each row +contains the indices (or labels if \code{ID} is provided) of the units +in one possible sample. Samples are listed in lexicographic order. } -\details{A support is defined as the set of samples such that for any sample in the support, all the permutations -of the coordinates of the sample are also in the support} -\value{The function returns a matrix of \eqn{binom(N)(n)} rows and \eqn{n} columns. Each row of this matrix -corresponds to a possible sample} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Tille, Y. (2006), \emph{Sampling Algorithms}. Springer\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas +\description{ +Enumerates all possible samples of size \code{n} from a population of +size \code{N}, returning the complete sampling support as a matrix. +} +\details{ +This function uses a combinatorial algorithm to enumerate all +\code{choose(N, n)} subsets of size \code{n} from \eqn{\{1, \ldots, N\}}. +It is intended for small populations only. For \code{N > 15} it becomes +very slow. } \examples{ -# Vector U contains the label of a population +# All samples of size 2 from a population of 5 U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) -n <- 2 -# The support for fixed size without replacement sampling designs -# Under this context, there are ten (10) possibles samples -Support(N,n) -# The same support, but labeled -Support(N,n,ID=U) -# y is the variable of interest -y<-c(32,34,46,89,35) -# The following output is very useful when checking -# the design-unbiasedness of an estimator -Support(N,n,ID=y) -} -\keyword{survey} +Support(N = 5, n = 2, ID = U) + +# Integer indices only +Support(N = 5, n = 2) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Ik}}, \code{\link{SupportWR}}, \code{\link{SupportRS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/SupportRS.rd b/man/SupportRS.rd index 5ae8231..8878a79 100644 --- a/man/SupportRS.rd +++ b/man/SupportRS.rd @@ -1,38 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SupportRS.r \name{SupportRS} \alias{SupportRS} -\title{Sampling Support for Random Size Without Replacement Sampling Designs} -\description{Creates a matrix containing every possible sample under random sample size designs} +\title{Complete Sampling Support for All Sample Sizes} \usage{ -SupportRS(N, ID=FALSE) +SupportRS(N, ID = FALSE) } \arguments{ -\item{N}{Population size} -\item{ID}{By default FALSE, a vector of values (numeric or string) identifying each unit in the population} +\item{N}{Population size. Recommended \code{N <= 10}.} + +\item{ID}{Optional vector of population labels of length \code{N}. +If provided, labels replace integer indices in the output.} } -\seealso{ -\code{\link{IkRS}} +\value{ +A matrix with \eqn{2^N} rows and \code{N} columns. Each row is one subset, +with \code{NA} used as padding for subsets smaller than \code{N}. The first +row represents the empty set (all zeros). } -\details{A support is defined as the set of samples such that for any sample in the support, all the permutations of the coordinates of the sample are also in the support} -\value{The function returns a matrix of \eqn{2^N} rows and \eqn{N} columns. Each row of this matrix corresponds to a possible sample} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Tille, Y. (2006), \emph{Sampling Algorithms}. Springer\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas +\description{ +Enumerates all possible non-empty subsets of a population of size \code{N}, +covering all sample sizes from 1 to \code{N}. The result includes the +empty set as the first row. +} +\details{ +This function stacks the outputs of \code{\link{Support}} for all sample +sizes \eqn{n = 1, \ldots, N}. It is only feasible for small populations +(\code{N <= 10}) due to exponential growth. } \examples{ -# Vector U contains the label of a population -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) -# The support for fixed size without replacement sampling designs -# Under this context, there are ten (10) possibles samples -SupportRS(N) -# The same support, but labeled -SupportRS(N, ID=U) -# y is the variable of interest -y<-c(32,34,46,89,35) -# The following output is very useful when checking -# the design-unbiasedness of an estimator -SupportRS(N, ID=y) -} -\keyword{survey} +# Complete support for N = 3 +SupportRS(3) +# 2^3 = 8 rows +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{Support}}, \code{\link{IkRS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/SupportWR.rd b/man/SupportWR.rd index e101a1a..fd750d0 100644 --- a/man/SupportWR.rd +++ b/man/SupportWR.rd @@ -1,43 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SupportWR.r \name{SupportWR} \alias{SupportWR} -\title{Sampling Support for Fixed Size With Replacement Sampling Designs} -\description{Creates a matrix containing every possible sample under fixed sample size with replacement designs} +\title{Sampling Support for With-Replacement Designs} \usage{ -SupportWR(N, m, ID=FALSE) +SupportWR(N, m, ID = FALSE) } \arguments{ -\item{N}{Population size} -\item{m}{Sample size} -\item{ID}{By default FALSE, a vector of values (numeric or string) identifying each unit in the population} +\item{N}{Population size.} + +\item{m}{Number of draws (sample size with replacement).} + +\item{ID}{Optional vector of population labels of length \code{N}. +If \code{FALSE} (default), integer indices are returned.} } -\seealso{ -\code{\link{Support}} +\value{ +A matrix with \code{choose(N+m-1, m)} rows and \code{m} columns. Each +row contains the (sorted) indices of one possible unordered outcome. +If \code{ID} is provided, population labels replace indices. } -\details{A support is defined as the set of samples such that, for any sample in the support, all the permutations -of the coordinates of the sample are also in the support} -\value{The function returns a matrix of \eqn{binom(N+m-1)(m)} rows and \eqn{m} columns. Each row of this matrix -corresponds to a possible sample} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Ortiz, J. E. (2009), \emph{Simulacion y metodos estadisticos}. Editorial Universidad Santo Tomas. \cr -Tille, Y. (2006), \emph{Sampling Algorithms}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Enumerates all distinct unordered outcomes (multisets) of size \code{m} +drawn with replacement from a population of size \code{N}. +} +\details{ +The number of distinct unordered with-replacement outcomes of size \code{m} +from \code{N} units is \eqn{\binom{N+m-1}{m}}. This is much smaller than +the \eqn{N^m} ordered outcomes. The algorithm uses a nested loop to +generate all non-decreasing sequences of length \code{m} from +\eqn{\{1, \ldots, N\}}. } \examples{ -# Vector U contains the label of a population -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) -m <- 2 -# The support for fixed size without replacement sampling designs -# Under this context, there are ten (10) possibles samples -SupportWR(N, m) -# The same support, but labeled -SupportWR(N, m, ID=U) -# y is the variable of interest -y<-c(32,34,46,89,35) -# The following output is very useful when checking -# the design-unbiasedness of an estimator -SupportWR(N, m, ID=y) -} -\keyword{survey} +# All unordered outcomes: N = 3, m = 2 +SupportWR(N = 3, m = 2) +# choose(3+2-1, 2) = choose(4,2) = 6 rows +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{IkWR}}, \code{\link{nk}}, \code{\link{p.WR}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/T.SIC.rd b/man/T.SIC.rd index 66168b9..443eb94 100644 --- a/man/T.SIC.rd +++ b/man/T.SIC.rd @@ -1,74 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/T.SIC.r \name{T.SIC} -\alias{T.SIC} -\title{Computation of Population Totals for Clusters} -\description{Computes the population total of the characteristics of interest in clusters. This function is used in order to estimate totals when doing a Pure Cluster Sample.} +\alias{T.SIC} +\title{Cluster Totals for Single-Stage Cluster Sampling} \usage{ -T.SIC(y,Cluster) +T.SIC(y, Cluster) } \arguments{ -\item{y}{Vector, matrix or data frame containing the recollected information of the variables of interest for every -unit in the selected sample} -\item{Cluster}{Vector identifying the membership to the cluster of each unit in the selected sample of clusters} +\item{y}{Vector, matrix or data frame containing the values of the +variables of interest for every unit in the sample.} + +\item{Cluster}{Vector identifying the cluster (PSU) membership of each +unit in the sample.} } -\seealso{ -\code{\link{S.SI}, \link{E.SI}} +\value{ +A matrix with one row per cluster and one column per variable of interest +(plus a first column \code{Ni} with the cluster size). Row names are the +cluster labels. } -\value{The function returns a matrix of clusters totals. The columns of each matrix -correspond to the totals of the variables of interest in each cluster} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Computes the total of each variable of interest within each cluster +(Primary Sampling Unit) in a single-stage cluster sample. +} +\details{ +This function aggregates the sample data by cluster, producing the cluster- +level totals needed for estimation under single-stage cluster sampling. +The output can be passed directly to \code{\link{E.1SI}} or \code{\link{E.SI}} +treating each cluster total as an observation. } \examples{ -############ -## Example 1 -############ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector y1 and y2 are the values of the variables of interest -y1<-c(32, 34, 46, 89, 35) -y2<-c(1,1,1,0,0) -y3<-cbind(y1,y2) -# Vector Cluster contains a indicator variable of cluster membership -Cluster <- c("C1", "C2", "C1", "C2", "C1") -Cluster -# Draws a stratified simple random sample without replacement of size n=3 -T.SIC(y1,Cluster) -T.SIC(y2,Cluster) -T.SIC(y3,Cluster) - -######################################################## -## Example 2 Sampling and estimation in Cluster smapling -######################################################## -# Uses Lucy data to draw a clusters sample according to a SI design -# Zone is the clustering variable -data(Lucy) -attach(Lucy) -summary(Zone) -# The population of clusters -UI<-c("A","B","C","D","E") -NI=length(UI) -# The sample size -nI=2 -# Draws a simple random sample of two clusters -samI<-S.SI(NI,nI) -dataI<-UI[samI] -dataI -# The information about each unit in the cluster is saved in Lucy1 and Lucy2 -data(Lucy) -Lucy1<-Lucy[which(Zone==dataI[1]),] -Lucy2<-Lucy[which(Zone==dataI[2]),] -LucyI<-rbind(Lucy1,Lucy2) -attach(LucyI) -# The clustering variable is Zone -Cluster <- as.factor(as.integer(Zone)) -# The variables of interest are: Income, Employees and Taxes -# This information is stored in a data frame called estima -estima <- data.frame(Income, Employees, Taxes) -Ty<-T.SIC(estima,Cluster) -# Estimation of the Population total -E.SI(NI,nI,Ty) +library(dplyr) +data('BigCity') +UI <- levels(as.factor(BigCity$PSU)) +NI <- length(UI) +nI <- 10 +sam <- S.SI(NI, nI) +sampleI <- UI[sam[sam != 0]] +CityI <- BigCity[BigCity$PSU \%in\% sampleI, ] +y <- data.frame(Income = CityI$Income, + Expenditure = CityI$Expenditure) +cluster <- CityI$PSU +T.SIC(y, cluster) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.1SI}}, \code{\link{E.2SI}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/VarHT.rd b/man/VarHT.rd index aa4f18c..d227e02 100644 --- a/man/VarHT.rd +++ b/man/VarHT.rd @@ -1,45 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/VarHT.r \name{VarHT} \alias{VarHT} -\title{Variance of the Horvitz-Thompson Estimator} -\description{Computes the theoretical variance of the Horvitz-Thompson estimator given a without replacement fixed sample size design} +\title{Exact Variance of the Horvitz-Thompson Estimator} \usage{ VarHT(y, N, n, p) } \arguments{ -\item{y}{Vector containing the recollected information of the characteristic of interest for every unit in the population} -\item{N}{Population size} -\item{n}{Sample size} -\item{p}{A vector containing the selection probabilities of a fixed size without replacement sampling design. The sum of the values of this vector must be one} +\item{y}{Vector of length \code{N} with the population values of the +variable of interest.} + +\item{N}{Population size. Recommended \code{N <= 15}.} + +\item{n}{Sample size.} + +\item{p}{Vector of probabilities for each possible sample in the support. +Must sum to 1.} } -\seealso{ -\code{\link{HT}, \link{Deltakl}, \link{Pikl}, \link{Pik}} +\value{ +A scalar: the exact variance of the Horvitz-Thompson estimator +\eqn{V(\hat{t}_{y,\pi})}. } -\details{The variance of the Horvitz-Thompson estimator, under a given sampling design \eqn{p}, is given by -\deqn{Var_p(\hat{t}_{y,\pi})=\sum_{k\in U}\sum_{l \in U}\Delta_{kl}\frac{y_k}{\pi_k}\frac{y_l}{\pi_l}} +\description{ +Computes the exact variance of the Horvitz-Thompson estimator of the +population total for a given fixed-size without-replacement sampling design, +using the full sampling support. } -\value{The function returns the value of the theoretical variances of the Horviz-Thompson estimator.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\details{ +The exact Horvitz-Thompson variance is: +\deqn{V(\hat{t}_{y,\pi}) = \sum_{k=1}^N \sum_{l=1}^N \Delta_{kl} +\frac{y_k}{\pi_k} \frac{y_l}{\pi_l}} +where \eqn{\Delta_{kl} = \pi_{kl} - \pi_k \pi_l}. This requires +enumerating the full support and is only feasible for small populations +(\code{N <= 15}). } \examples{ -# Without replacement sampling -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector y1 and y2 are the values of the variables of interest -y1<-c(32, 34, 46, 89, 35) -y2<-c(1,1,1,0,0) -# The population size is N=5 -N <- length(U) -# The sample size is n=2 +N <- 5 n <- 2 -# p is the probability of selection of every possible sample +y <- c(32, 34, 46, 89, 35) p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) - -# Calculates the theoretical variance of the HT estimator -VarHT(y1, N, n, p) -VarHT(y2, N, n, p) +VarHT(y, N, n, p) +} +\references{ +Horvitz, D.G. and Thompson, D.J. (1952). A generalization of sampling +without replacement from a finite universe. +\emph{Journal of the American Statistical Association}, 47, 663-685.\cr +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer. +} +\seealso{ +\code{\link{Deltakl}}, \code{\link{VarSYGHT}}, \code{\link{HT}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/Wk.rd b/man/Wk.rd index d70cc58..84b1a41 100644 --- a/man/Wk.rd +++ b/man/Wk.rd @@ -1,185 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Wk.r \name{Wk} \alias{Wk} -\title{The Calibration Weights} -\description{Computes the calibration weights (Chi-squared distance) for the estimation of the population total of several variables of interest.} +\title{GREG Generalised Weights} \usage{ -Wk(x,tx,Pik,ck,b0) +Wk(x, tx, Pik, ck, b0 = FALSE) } \arguments{ -\item{x}{Vector, matrix or data frame containing the recollected auxiliary information for every unit in the selected sample} -\item{tx}{Vector containing the populations totals of the auxiliary information} -\item{Pik}{A vector containing inclusion probabilities for each unit in the sample} -\item{ck}{A vector of weights induced by the structure of variance of the supposed model} -\item{b0}{By default FALSE. The intercept of the regression model} -} -\details{The calibration weights satisfy the following expression -\deqn{\sum_{k\in S}w_kx_k=\sum_{k\in U}x_k} -} -\value{The function returns a vector of calibrated weights.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. -} -\examples{ -############ -## Example 1 -############ -# Without replacement sampling -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector x is the auxiliary information and y is the variables of interest -x<-c(32, 34, 46, 89, 35) -y<-c(52, 60, 75, 100, 50) -# pik is some vector of inclusion probabilities in the sample -# In this case the sample size is equal to the population size -pik<-rep(1,5) -w1<-Wk(x,tx=236,pik,ck=1,b0=FALSE) -sum(x*w1) -# Draws a sample size without replacement -sam <- sample(5,2) -pik <- c (0.8,0.2,0.2,0.5,0.3) -# The auxiliary information an variable of interest in the selected smaple -x.s<-x[sam] -y.s<-y[sam] -# The vector of inclusion probabilities in the selected smaple -pik.s<-pik[sam] -# Calibration weights under some specifics model -w2<-Wk(x.s,tx=236,pik.s,ck=1,b0=FALSE) -sum(x.s*w2) +\item{x}{Vector or matrix of auxiliary variables observed in the sample.} -w3<-Wk(x.s,tx=c(5,236),pik.s,ck=1,b0=TRUE) -sum(w3) -sum(x.s*w3) +\item{tx}{Vector of known population totals of the auxiliary variables.} -w4<-Wk(x.s,tx=c(5,236),pik.s,ck=x.s,b0=TRUE) -sum(w4) -sum(x.s*w4) +\item{Pik}{Vector of first-order inclusion probabilities for each unit +in the sample.} -w5<-Wk(x.s,tx=236,pik.s,ck=x.s,b0=FALSE) -sum(x.s*w5) +\item{ck}{Vector of variance-stabilising constants. Typically \code{ck = 1} +(homoscedastic) or \code{ck = x} (heteroscedastic).} -###################################################################### -## Example 2: Linear models involving continuous auxiliary information -###################################################################### - -# Draws a simple random sample without replacement -data(Lucy) +\item{b0}{Logical. If \code{TRUE}, an intercept column is prepended to +\code{x}. Default is \code{FALSE}.} +} +\value{ +A numeric vector of length \code{n} with the GREG weight for each unit +in the sample. +} +\description{ +Computes the generalised regression (GREG) weights for each unit in the +sample. These weights incorporate both the sampling design weights and a +calibration adjustment based on known population totals of auxiliary +variables. +} +\details{ +The GREG weight for unit \eqn{k} is: +\deqn{w_k = \frac{1}{\pi_k} + \mathbf{x}_k^T +\left(\sum_s \frac{v_k \mathbf{x}_k \mathbf{x}_k^T}{\pi_k}\right)^{-1} +(\mathbf{t}_x - \hat{\mathbf{t}}_{x,\pi})} +where \eqn{v_k = 1/(\pi_k c_k)} and \eqn{c_k} is a variance-stabilising +constant. The GREG estimator is then \eqn{\hat{t}_{GREG} = \sum_s w_k y_k}. +} +\examples{ +data('Lucy') attach(Lucy) - -N <- dim(Lucy)[1] -n <- 400 +N <- nrow(Lucy) +n <- 400 +sam <- S.SI(N, n) Pik <- rep(n/N, n) -sam <- S.SI(N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) - -########### common ratio model ################### - -estima<-data.frame(Income) -x <- Employees -tx <- sum(Lucy$Employees) -w <- Wk(x, tx, Pik, ck=1, b0=FALSE) -sum(x*w) -tx -# The calibration estimation -colSums(estima*w) - -########### Simple regression model without intercept ################### - -estima<-data.frame(Income, Employees) -x <- Taxes -tx <- sum(Lucy$Taxes) -w<-Wk(x,tx,Pik,ck=x,b0=FALSE) -sum(x*w) -tx -# The calibration estimation -colSums(estima*w) - -########### Multiple regression model without intercept ################### - -estima<-data.frame(Income) -x <- cbind(Employees, Taxes) -tx <- c(sum(Lucy$Employees), sum(Lucy$Taxes)) -w <- Wk(x,tx,Pik,ck=1,b0=FALSE) -sum(x[,1]*w) -sum(x[,2]*w) -tx -# The calibration estimation -colSums(estima*w) - -########### Simple regression model with intercept ################### - -estima<-data.frame(Income, Employees) -x <- Taxes -tx <- c(N,sum(Lucy$Taxes)) -w <- Wk(x,tx,Pik,ck=1,b0=TRUE) -sum(1*w) -sum(x*w) -tx -# The calibration estimation -colSums(estima*w) - -########### Multiple regression model with intercept ################### - -estima<-data.frame(Income) -x <- cbind(Employees, Taxes) -tx <- c(N, sum(Lucy$Employees), sum(Lucy$Taxes)) -w <- Wk(x,tx,Pik,ck=1,b0=TRUE) -sum(1*w) -sum(x[,1]*w) -sum(x[,2]*w) -tx -# The calibration estimation -colSums(estima*w) - -#################################################################### -## Example 3: Linear models involving discrete auxiliary information -#################################################################### - -# Draws a simple random sample without replacement -data(Lucy) -attach(Lucy) - -N <- dim(Lucy)[1] -n <- 400 -sam <- S.SI(N,n) -# The information about the units in the sample is stored in an object called data -data <- Lucy[sam,] -attach(data) -names(data) -# Vector of inclusion probabilities for units in the selected sample -Pik<-rep(n/N,n) -# The auxiliary information is discrete type -Doma<-Domains(Level) - -########### Poststratified common mean model ################### - -estima<-data.frame(Income, Employees, Taxes) -tx <- colSums(Domains(Lucy$Level)) -w <- Wk(Doma,tx,Pik,ck=1,b0=FALSE) -sum(Doma[,1]*w) -sum(Doma[,2]*w) -sum(Doma[,3]*w) -tx -# The calibration estimation -colSums(estima*w) - -########### Poststratified common ratio model ################### - -estima<-data.frame(Income, Employees) -x<-Doma*Taxes -tx <- colSums(Domains(Lucy$Level)) -w <- Wk(x,tx,Pik,ck=1,b0=FALSE) -sum(x[,1]*w) -sum(x[,2]*w) -sum(x[,3]*w) -tx -# The calibration estimation -colSums(estima*w) - +x <- as.matrix(Employees[sam]) +tx <- sum(Employees) +ck <- rep(1, n) +wk <- Wk(x, tx, Pik, ck) +# Check calibration: weighted sum of x equals tx +sum(wk * x) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{GREG.SI}}, \code{\link{E.Beta}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} \ No newline at end of file diff --git a/man/kish_allocation.Rd b/man/kish_allocation.Rd new file mode 100644 index 0000000..061890e --- /dev/null +++ b/man/kish_allocation.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kish_allocation.R +\name{kish_allocation} +\alias{kish_allocation} +\title{Kish Allocation for Stratified Sampling} +\usage{ +kish_allocation(n, N_h, I = 0.5) +} +\arguments{ +\item{n}{Integer. Total desired sample size.} + +\item{N_h}{Named numeric vector. Population sizes for each stratum \eqn{h = 1, \ldots, H}.} + +\item{I}{Non-negative numeric. Intraclass correlation coefficient (ICC) or + design effect parameter controlling the allocation: +\itemize{ + \item \code{I = 0} → Uniform allocation (equal sample per stratum). + \item \code{I = Inf} → Proportional allocation (proportional to \eqn{N_h}). + \item \code{0 < I < Inf} → Compromise between uniform and proportional. + \item Recommended value: \code{I = 0.5} (Kish, 1992). +}} +} +\value{ +A named integer vector of length \eqn{H} with the allocated sample + sizes per stratum. The values sum to approximately \code{n} (rounding may + cause a difference of ±1). +} +\description{ +Computes the optimal sample size allocation across strata using the +Kish (1992) compromise allocation method, which interpolates between +uniform and proportional allocation through a design effect parameter \code{I}. +} +\details{ +The Kish compromise allocation assigns sample sizes as: +\deqn{ + n_h = n \cdot \frac{\sqrt{I \, W_h^2 + H^{-2}}}{\sum_{h=1}^{H} \sqrt{I \, W_h^2 + H^{-2}}} +} +where \eqn{W_h = N_h / N} is the stratum weight and \eqn{H} is the number of strata. + +This formulation nests two classical allocations as limiting cases: +when \eqn{I = 0} the numerator reduces to \eqn{1/H} (uniform), and as +\eqn{I \to \infty} it is dominated by \eqn{W_h} (proportional). +} +\examples{ +N_h <- c( + Corozal = 41847, + Orange_Walk = 48175, + Belize = 57658, + Cayo = 78473, + Stann_Creek = 31347, + Toledo = 31711 +) + +# Uniform allocation (I = 0) +kish_allocation(n = 3096, N_h = N_h, I = 0) + +# Proportional allocation (I -> Inf, use a large number) +kish_allocation(n = 3096, N_h = N_h, I = 1e6) + +# Kish recommended compromise (I = 0.5) +kish_allocation(n = 3096, N_h = N_h, I = 0.5) + +} +\references{ +Kish, L. (1992). Weighting for unequal \eqn{P_i}. +\emph{Journal of Official Statistics}, 8(2), 183–200. +} +\seealso{ +\code{\link[TeachingSampling]{S.STSI}} for stratified simple random sampling, +\code{\link[TeachingSampling]{S.STPPS}} for stratified PPS sampling. +} diff --git a/man/nk.rd b/man/nk.rd index 240e823..c1926ce 100644 --- a/man/nk.rd +++ b/man/nk.rd @@ -1,31 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nk.r \name{nk} \alias{nk} -\title{Sample Selection Indicator for With Replacement Sampling Designs} -\description{The function returns a matrix of \eqn{binom(N+m-1)(m)} rows and \eqn{N} columns. Creates a matrix of values (0, if the unit does not belongs to a specified sample, 1, if the unit is selected once in the sample; 2, if the unit is selected twice in the sample, etc.) for every possible sample under fixed sample size designs with replacement} +\title{Frequency Matrix for With-Replacement Sampling} \usage{ nk(N, m) } \arguments{ -\item{N}{Population size} -\item{m}{Sample size} +\item{N}{Population size. Keep small due to combinatorial growth.} + +\item{m}{Number of draws (sample size with replacement).} } -\seealso{ -\code{\link{SupportWR}, \link{Pik}} +\value{ +An integer matrix of dimension \code{choose(N+m-1, m) x N}, where entry +\eqn{(s, k)} is the frequency of unit \eqn{k} in outcome \eqn{s}. } -\value{The function returns a matrix of \eqn{binom(N+m-1)(m)} rows and \eqn{N} columns. The \eqn{k}th column corresponds to the sample -selection indicator, of the \eqn{k}th unit, to a possible sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Constructs the frequency matrix of the with-replacement sampling support +for a population of size \code{N} and \code{m} draws. Each row corresponds +to one possible outcome and each column to one population unit, with entry +\eqn{(s, k)} equal to the number of times unit \eqn{k} was selected in +outcome \eqn{s}. +} +\details{ +Unlike \code{\link{IkWR}}, which records only whether a unit was selected, +this function records how many times each unit was selected. This is needed +for with-replacement estimators based on selection frequencies. } \examples{ -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -N <- length(U) +# Frequency matrix: N = 3 units, m = 2 draws +N <- 3 m <- 2 -# The sample membership matrix for fixed size without replacement sampling designs -nk(N,m) +nk(N, m) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{IkWR}}, \code{\link{SupportWR}}, \code{\link{p.WR}} +} +\author{ +Hugo Andres Gutierrez Rojas } -\keyword{survey} diff --git a/man/p.WR.rd b/man/p.WR.rd index fc0639d..29bc131 100644 --- a/man/p.WR.rd +++ b/man/p.WR.rd @@ -1,61 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/p.WR.r \name{p.WR} \alias{p.WR} -\title{Generalization of every with replacement sampling design} -\description{Computes the selection probability (sampling design) of each with replacement sample} +\title{Sample Probabilities under With-Replacement Sampling} \usage{ p.WR(N, m, pk) } \arguments{ -\item{N}{Population size} -\item{m}{Sample size} -\item{pk}{A vector containing selection probabilities for each unit in the population} +\item{N}{Population size.} + +\item{m}{Number of draws (sample size with replacement).} + +\item{pk}{Vector of length \code{N} with selection probabilities for each +unit. Must sum to 1.} } -\details{Every with replacement sampling design is a particular case of a multinomial -distribution. -\deqn{p(\mathbf{S}=\mathbf{s})=\frac{m!}{n_1!n_2!\cdots n_N!}\prod_{i=1}^N p_k^{n_k}} -where \eqn{n_k} is the number of times that the \eqn{k}-th unit is selected in a sample. +\value{ +A numeric vector of length \code{choose(N+m-1, m)} with the probability +of each distinct unordered outcome in the with-replacement support. } -\value{The function returns a vector of selection probabilities for every with-replacement sample.} -\author{Hugo Andres Gutierrez Rojas \email{hagutierrezro@gmail.com}} -\references{ -Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), \emph{Model Assisted Survey Sampling}. Springer.\cr -Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas y estimacion de parametros}. -Editorial Universidad Santo Tomas. +\description{ +Computes the probability of each possible outcome in the with-replacement +sampling support, given unit selection probabilities \code{pk}. +} +\details{ +For each distinct unordered outcome (multiset) in the support enumerated +by \code{\link{nk}}, the probability is computed as a multinomial +probability: +\deqn{p(s) = \frac{m!}{\prod_k n_k!} \prod_k p_k^{n_k}} +where \eqn{n_k} is the number of times unit \eqn{k} appears in outcome +\eqn{s} and \eqn{p_k} is the selection probability of unit \eqn{k}. } \examples{ -############ -## Example 1 -############ -# With replacement simple random sampling -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector pk is the sel?ection probability of the units in the finite population -pk <- c(0.2, 0.2, 0.2, 0.2, 0.2) -sum(pk) -N <- length(pk) -m <- 3 -# The smapling design -p <- p.WR(N, m, pk) -p -sum(p) - -############ -## Example 2 -############ -# With replacement PPS random sampling -# Vector U contains the label of a population of size N=5 -U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -# Vector x is the auxiliary information and y is the variables of interest -x<-c(32, 34, 46, 89, 35) -y<-c(52, 60, 75, 100, 50) -# Vector pk is the sel?ection probability of the units in the finite population -pk <- x/sum(x) -sum(pk) -N <- length(pk) -m <- 3 -# The smapling design -p <- p.WR(N, m, pk) -p -sum(p) -} -\keyword{survey} \ No newline at end of file +# N = 3 units, m = 2 draws, equal probabilities +N <- 3 +m <- 2 +pk <- c(1/3, 1/3, 1/3) +p <- p.WR(N, m, pk) +sum(p) # must equal 1 +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{nk}}, \code{\link{SupportWR}}, \code{\link{S.PPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} From c1b52fa3ccaff8a695b04426f8b54a1580f95d03 Mon Sep 17 00:00:00 2001 From: YURYVOM <112267599+YURYVOM@users.noreply.github.com> Date: Tue, 16 Jun 2026 15:17:33 -0500 Subject: [PATCH 3/3] docs: restore original examples and add Roxygen2 documentation to all functions --- R.zip | Bin 0 -> 73368 bytes R/Deltakl.r | 8 +- R/Domains.r | 48 +++++-- R/E.1SI.R | 6 +- R/E.2SI.r | 133 ++++++++++++++++---- R/E.Beta.r | 84 ++++++++++++- R/E.Quantile.r | 36 ++++-- R/E.SI.r | 88 ++++++++++++- R/E.STPPS.r | 110 ++++++++++------ R/E.STSI.r | 117 +++++++++++------ R/E.STpiPS.R | 125 +++++++++++-------- R/GREG.SI.r | 157 +++++++++++++++++++++-- R/HH.r | 151 +++++++++++++++++++--- R/HT.r | 277 +++++++++++++++++++++++++++++++++++++++-- R/IPFP.r | 36 +++++- R/Ik.r | 8 +- R/IkRS.r | 9 +- R/IkWR.r | 7 +- R/OrderWR.r | 18 +-- R/PikHol.r | 35 +++++- R/PikPPS.r | 51 ++++++-- R/Pikl.r | 9 +- R/S.BE.r | 40 +++--- R/S.PO.r | 37 ++++-- R/S.PPS.r | 35 ++++-- R/S.SI.r | 28 ++++- R/S.STPPS.r | 53 ++++++-- R/S.STSI.r | 25 +++- R/S.STpiPS.R | 31 +++-- R/S.SY.r | 33 ++++- R/S.WR.r | 31 ++++- R/S.piPS.r | 80 +++++++++--- R/Support.r | 14 ++- R/SupportRS.r | 9 +- R/SupportWR.r | 11 +- R/T.SIC.r | 40 ++++-- R/VarHT.r | 10 +- R/VarSYGHT.R | 2 +- R/Wk.r | 165 ++++++++++++++++++++++-- R/nk.r | 5 +- R/p.WR.r | 40 +++++- man/Deltakl.rd | 8 +- man/Domains.rd | 48 +++++-- man/E.2SI.rd | 133 ++++++++++++++++---- man/E.Beta.rd | 84 ++++++++++++- man/E.Quantile.rd | 36 ++++-- man/E.SI.rd | 88 ++++++++++++- man/E.STPPS.Rd | 57 +++++++++ man/E.STSI.Rd | 70 +++++++++++ man/E.STpiPS.Rd | 61 +++++++++ man/GREG.SI.rd | 157 +++++++++++++++++++++-- man/HH.Rd | 145 +++++++++++++++++++++ man/HT.rd | 277 +++++++++++++++++++++++++++++++++++++++-- man/IPFP.rd | 34 ++++- man/Ik.rd | 8 +- man/IkRS.rd | 9 +- man/IkWR.rd | 7 +- man/OrderWR.rd | 18 +-- man/PikHol.rd | 35 +++++- man/PikPPS.rd | 51 ++++++-- man/Pikl.rd | 9 +- man/S.BE.rd | 40 +++--- man/S.PO.rd | 37 ++++-- man/S.PPS.rd | 35 ++++-- man/S.SI.rd | 28 ++++- man/S.STPPS.rd | 53 ++++++-- man/S.STSI.rd | 25 +++- man/S.STpiPS.Rd | 31 +++-- man/S.SY.rd | 33 ++++- man/S.WR.rd | 31 ++++- man/S.piPS.Rd | 63 ++++++++++ man/Support.rd | 14 ++- man/SupportRS.rd | 9 +- man/SupportWR.rd | 11 +- man/T.SIC.rd | 40 ++++-- man/VarHT.rd | 10 +- man/Wk.rd | 165 ++++++++++++++++++++++-- man/kish_allocation.Rd | 29 +++-- man/nk.rd | 5 +- man/p.WR.rd | 40 +++++- 80 files changed, 3607 insertions(+), 629 deletions(-) create mode 100644 R.zip create mode 100644 man/E.STPPS.Rd create mode 100644 man/E.STSI.Rd create mode 100644 man/E.STpiPS.Rd create mode 100644 man/HH.Rd create mode 100644 man/S.piPS.Rd diff --git a/R.zip b/R.zip new file mode 100644 index 0000000000000000000000000000000000000000..dac36ab2e320a4f903ba1a1271b77c4cb4254dfb GIT binary patch literal 73368 zcmZ^~V~}NGwzZqKZQHhO+qP}HGP4quwr$(CZCBdP{HpqN-`jnn&yJWYcI=3?V*S|h z%rWL1uYxo%2nxX8kBQQZ&Og5V^8y4w0H8$g>H!S^2=e!*e_T{mU;)4{&>5@V}fBoSe)57=1U|OZgK>Dy~h?_lIXq*qPQONZg+T# zE);tB+T|U zu`*{O!ulHtYj_qK&o+@Uai%cPt3+;L;2CUQ$G#{b`U{**k|9a|h7nl@_OnZi(2NY} zk!38wsZUCf0YQ)}gUY7IJvV8*mwxw2u^M@K3Rz=s*Z?ldzo`wmQW@=YPXotkj75%R zvOjn&0`HJMMs?0QzwVF~V|*y9Nwvk6@9zQOsX6ws)R?oqI_C@97B8(*6{$EacwG-9 z?A}DUCb9^fK&AOrQQAD*9>_$BC1LdYBxs^N$@@|% zpSc#Q^tPsXG6t!mUUGt?1_`JQPk$pZH7Rmnqsb zRr7jUG|;iZl0uWy{);xq++;1-FvHqF@X^oI0rLF?77A&a=O}J(@B;RZesOt79;I`g|few)sXx;|+r5r7yk+3jp>iR}Qtv{XMqOGzEf4?I|ZWn$LoZvNxAXgPWLc^m2*G4(CjcoYXYLQV0)k}Ayu zvUy@wgad64Noda1iD^6p^joSDd{V-6naX4<{d(h2UZ`znzMTm_`M*edpAD>PC^ zwkX{#^%8~+s(yPD0u_Yur_uE;Z>D3E8JS+EYIm$y6VhUeOp=yqWnpXPWpxpYk5FGF zD;L&@FY9h2lS?$sqJ#P88s-PgwZ z*(dyP+XsMica$ZAiXP9oDhszcFa=qZB8^ZWN%`i|cCy200oUwrQ9b){1ojigC_>fw zJBdxv!r%G} zh8Q(nd2DeMU-5l@oj&2^rFx)sbOohQuoglpMOEZSKo5-T*h|O`=Br7XC$QI^=gl-t ze}M=r#GCCk@2j>Gzi9ZKS?{(??DOYmxSbn+uj`~5(-#j*tUMOoGpVG0Z0VJL?WX7& z)%hf~sV*ii3_cCYP8Sl3%EnsUvL`xT>7e7BsmRE-WGd&#A08o5 zwhTZ?TK<%9ztjKx#e1j%dy1O{V2MNu0S;dgz?$4Nw-9HKM4zd)+c$=rDCp3k0H@^} z!DKn-VA$(O?AcomI;%5>z6h<_apS-&jFhOBqN68Q>gqJV%p%CDta8>F#KLh6nwsIM z4Gul(*(W*tfK3_axxfYg)KyRklx{tuY`yNO5OQX#05uzz!4{=Zhfy>Ov((t6ey;^6 zy^J(;E1tB#^RZX1N+%SQFAg~?D+*nJQuks<$)nc;I)4p#RR z%I`t^JW1r*x1eWAeed*AM#ejlHH0q*D3?xZ;R8$XxIZe4t8uA-#*?Bx62kvh8Ou_7 zhy{^99sD!bMM^#=C8qiKNW7sw&61jYR5yhw*`1`aS|n>Byuc7~-K>ghPaO7FijTk(!PMT|C{nU4*yE5c{-2br7Xpr8!|DY$3PiVVOe& zUkwV2N^t?b3XkwtIfw=ntgn4;!A8QcWTYCaCDwKC_nxIjbhR2rsd+PYvmMV7( zj@_p;7}hZQ>0uK4erk~?Rm_v2Fov%CAv%~WoM2p-%ZbaZq&otXbMBO&gZ}&mu@F_A zVLcP{^&xl9?bQ$cjQc(V>!}VKPK}eN0vkE~iQ^yA(w{TU7o|vgA*7~5Kq*0>?hA#} zhs8&JLrPS21JqZyAVBbf1=PP5%Pei&#l|t1=TWx{cG$xVJ|>L}_{Hxp$9!w8n!lWU zaPSb)@ri>koU6glscw?{n53iyqGiv8iQ*5Y6>gbaX0dhQxtHTG{f7h!G+be^oplqB z9VSoEs$W0ZJo#ew2JE)IVqKQKtsho~Si@0#Gi+$&U7f!@v^>r1L8I|7&{$3je(Pg@erRET+WlUqX(jE)(r?jK zM0voC+?=)>A$5=hdTK z62klv+XP+r^sFS#s8XY{40~$n|FhQD(*6C00!h`{*;EVS*CbcEby*2s7ru>vxpwDF z!$GFQ5{psff)>Z_w|#Td$uk2M6nijGfTVeSG(SK_(rqqR7@k$ z%TU(y0vAT6i|Wl(Wg@G)5_K{9WEIGJenvA?vht}2656!U;<*T%RG^!9zZC!5t-se zh9xBXVyUkBuV9<^)-=0GcX3bJ27T-N15NrljMRA>loVpOdU3_pGXavbS73qh?O@jpAxA_vRRVT?-*mjUBIP zgsOcJk@aBg0*wW4PNR%UjbN*yb?$%LXf-%13g}77BexSQqjmev77ElSNE`J+%&HUO zeJCK6p{?8eIFwAr=;umm3;^IBQi7vyk0w#83%!!|Ti!FS4}m!}k`)@+)>>S_n3iJm z3)nWwy6pv5yfbQ2wI$whalyu@0}xRKV8C>;BbcHx9H}v*jAZIT9p9%RyAWpZ3GTIu zJ>geIlhyCxi$->%{B7p?t(lTJBOBJUIY?@RDwp- zOHxV0iQokKBstKZbTL@su7`Nnv3CaHTPc>eOlKtgV73LkqNJ8?xR&W z(2`A_wiA)FqQH?uE`@tc6}qfF zFdE890zC3W=lf$dbOrKDmk75=rf?G?K&)>Ow$v>35QfdV?k6EPyl>%#9R$biYn3yl z_oKqIxiAW0;y_9NcpQ2d50^PcwtqyiZNkNR;xgC*2i@E&jyVrL4f6Va7^nGK=yC`iXkVP4o*sfaM#?e-!Qa(?J}G460?%yaSw&IC(AD(7KXj<{dT90YADUu7|2s_&{tucm{h{f9wWrvBNcy+_()3c( z)*hQ3>FbX@H6-vqEpJIKNO~X)Qyz5v^#g82RAG%TR}yNLdfcdCVk<$1I%@CRkD26I zcB#0lSE45qP;hgvlLv<{b8lRHr%(E;Y&6O9?eLWQtyh{@X8h8jI%YH#u`R?0)@)$U zR_|9?qRw$xk-|8txN}WZn4FMK*I0EOaw!xOTTpaQwO?V@ z=dZHWS0S%HK5c?!V&9vfGpeW^7DgFrHfFwQwtkhgK$i#<;R4W!GR3cLRdPf zY$eNM-i97zeD~h(AXk{%r{ZX#)Exr>eh&CLPYi^hys4e$ z(wQCMQ4L{{DjVU_AohZzAuXqELQE#2)$yaLH<_gnYD9=Rd`;Hfdu9`DJYrCq>W`gU zLh}SR{ZuT8gm$j!$J+alXC#yhS|k#m=~v6zu;oES<;)!>Q0@evv4Qi5AO|Ni#5px7 zl_JNK9?ILt4s`0N=r87$AXlj0F#IPyjIid|ZrYJ5aFe~ZVP_bIzwk2?gD8^)J1x|~ zVn_;h^?Ae#n!9v3NWV~<>jJDd#p9XGILb1hY=h1B#bJYa;H&78U)7k>B=ry>Wd%Bo0Km;f`IE3JGzt)*onGGxpt&#jYmOauRqpx0pW7q0|PjR`L%oW03 z%pq2;z`(bePR5&dT&61-=5d$a^rhe zhO#wi<|C2}9QYx@__xl1IqENI@R{b7I_4vBPM+3meM|Yc)vlA4qE5F)M&?DVX&zHW zyi)C=e%jr&GHp$@Fn~1T6QT(8LTA7VU9^&`-SeV%dz95Y_>L72@uzL#^a5KPo z!cvDAVg>QIoSABFZZi7U&Deow#fRMENi!I z@ewnqI31L-W;YJAHR6ObL7C#V?}Jz~=C?>fM6w2{KQ@_7wHm>`;lWc|sz@^Gs7RA1 zxy%Czo}(k3_w@8NV7HsC2W?Fdt%W)+?x9{*9iYaLv0C!h3=bYTuEwq1QxJJBCU|el zNbsHA6oT7z^i+lpuD7rBd;v7Xy1G@`)yjdQnV%Z8)kv1S#1&?>0vG)VLKxes_>oZ} z6hP`bRjLcdk>CBJYb2?bi0cnHov?!v<{*OLbq}D#9ZUK zvY_T;C% z!@TTH52y9vz5c4*q;r)z5?s4?XPd;_-#U>b)Usx~v4H)2DZ}@mnA^5+L5$0~upt`^ zcX)vz`w{U%x~g8f+beL9*lK;Rf8TMd;h@xRt*)nw@XF(N7z7*N;)pOO?2%1RhC9;Q zj?l)|M|?V`R7ny_ysY(EWx}+U$F~uARrzX8R@8{qEy*)YPJ+*Rg16d?1m8QgdQkR* zdQ0+C2L(vvCF(#;J85f8*?aou_w-H76^zS8d5kMR-aNKOTK;>+{m|RoLOFBCy}4sqf2on<0aF!;HyvN|=Md3G?vyS+>O2 zEQ0v%gngFJ6S4?7#1KjLd!PjN43?C!r4;$3F zyR~)+T8X+=M2zh#&wuA#8ZF}zy+eU!LQYDHiHYc%YJ!zxhY$sZ=0Up|ND1O2aS15WFxM}zITc1Cc%{Uz1`vx(G+II75I-K__@TXim(5I78 z7ENfPR$z9pHj&SQsxHVxl3RX&){(_7Ow7EiqC{$BJyoF=NGXo3naR*e*Pc#_>gB9g zCA2MGk@s4UR>BezQJBlR%UZcC+sXtP&oWtwtus(jB6L#k0un4xl#L|fe4vW`ULmP* zEb2V{QPeQ877c@Q>Oi84G``@d1b9P!Gma{zFa4D%p>ARr5N!K;$ojCrW{eNqoU<-A z!`oPp0;ff2+*v*yWH6KmidfV;P@7yCPJGkVYVf91c#%(^dQWg|_=e2MOZ(aI?^bKr zcwcIzH@VYEL}){&RE&Ztwbo0W(Q3I@eWEOV`R0D+nESR#lCsgdZl)R1gzn{N9HYQn zZ;c$X9Dd|g@*P$J93}oz@gS8>N36=iVKqs| zNkhCqfb{;Q`V~|7g>x4C$ws5BOnb7gQcILbtM)ulTuDWZEPQ(huJ4;~dTpUkvP z(yEgh_(CK>(V(g>7q91RPGlDCBuD6fhlbUSOX9y&xFvXn(ck?1(%0b7j1o0x!U_o% zm8>sJqx*&-NMgtj0jrkOvQ4nlgJegS>)pI*R4ywl-aj8y!4p+97O%WCWbba~@9D`YkT`Xk~Wi~ujLV?V@mRH(&SVN|g{;2|F-giT<`>8bhWeqiVdV0zr z*AF{fuhp%m)C@GdEHyDSV|5=f_2#Hk#83f4J6qV2cI$ZVM&=OTpDF+!K2X4633xV) z+3o->nKg|lWfjF-diEsoL#iR>&&(_YDc$XmX*sS_&&fqhdz^2KR5qZH@vEBTadKP- zL0U;~p+gU9BXO80s^4wFPx=F2&-`r7q)DpTCeh`|v}t{Cui@ImU;r5H@>AD*O78-; zpVKRwxEW*sMea#(?#Eg~sM_TK18yjIdRW+VCSN+)Hom>*={0PPKX>S!ZUwLhJhQ}S zgt$J+7G#Jr1TO8GJ63xA5$vaDi&W|BRo56dMq+MBI8G}+Jk7GDxEYOPv^bB zA*==;+Y?TH-|+&YghA~ms{wS^?~ic&!@zLD(-sC?(LW2Oo#)d)`-4-D0m4PN-E@ME zsxjxp-T_^o-hGg3){Zgk7`Py#9^pp4>wyT^jL~nn0|Ym}7(N>ooV!5$VbKs2{8q9T z)s1tYrGDCu&OUZq-*Wi_C!Twj;$K0W(G;GAaK((*r)k>CKUJUykQ^b7cz16M9;sdn z1RcZ+YKv1Ct_Q`v;*zctzenp%*34b$$aG?dAt+%_<65n`Tnssf&a?i*)nf-+U*vT z)K;s6nk(iIW*jqob1JcCL4q6^%Bp}$X>~f$If*bW5?_w<79>`W;r){`U1TBwm$NT^ zd}2HwkXx`-kTY9DhEUj$CRLM^0w(uo6-)T?`K%;yqca;3G9a<0#1Dn?`f(m3_z4e< z^#LY8v`qGv4-VlmblO&j^XCLOc-39+26woTr6YK zLy-#aco0Q=DgOIa*n#-r2Kcj4TTykpGzB8L2tzT9C*HM|ahiq9vYk$>{SaQM2G&Bci%MjwGKI{#W3$&)L^4rZ z&>aA>R_078{d0uw-W!{uvzXQ8?e|^56*~De`6l(%WwM20by!2mlbtf6R}axtNm_-^ zFpOz@-S72G&7@wImxh9SiN!6f3H*-GC6vvxhs*?tyY?aG&3$qEsycufD znhc44dQUOQ&;MQ}4J(S`HwT%V+;6ISwcbSq(ZWIqvPU0}F_yt6lSAKP@b-Gsx})2? zQW80gc$BDi#0y$K=+L-e%Z>GHU%G;+I(X9u{Usd3gwFD%E=$&4=_;W+ zz@Yn{)IB`(yOU7jQ)vNx@p59&y%}}S=%olY{LsbT{n?fp4z7&e!str_30*m)Gk@P& z<^4txiw0!^f=@bBzn|zLIV4g-E5)!;3o~k4mK0S}iPfL_kkx9PFGxWN5H+(xCsoV} zCVx~WYg)RWzw6Uw`E&8gUr$LleZM-fs$_q&ZVTA!Rrd>2efF_BuK|9fULcP*Cjxq? zX}lVkyo!cOH4SXdRFJ&R{%!{|$sHWOsdZ1Rqlk3B8F)+fpb8ULeJuYdtY7Q5mUSR< zc5eqRpL3VTY@5@3P|^41mMd^n{{UYSAy;Ic1*puXI5spmqY1($Cw2Qzhv`j%0GLFJ z1`Hy|x|GGn_r)7{D@uV3W@1cYJ;9UQXe$A{{fc8GjdMkmLB0mO9a%nA>KRC_Sd?*m zIcjG4Lw9bTwo5FHhc*K5=sf9&C7`)M2fAYmpGr>Pd>rvCj>FuIW8`g8Qn;^u&rBJ3 zu3Lxa-lkyG%J*DLxovf0YXZwtH4pqi)^DU`utJBQwmP!Q;9M(iOH@6SksFNcNbj_k zo4xPUP5Q502>c1_uv9#lx5AiT-t!aSd&>3i&xKEVjq;Z@Q`J50E$89jki5Hb+v@(B z9p60{_xf|kz?HL?-}A$G4x9QxKcmt{a_Sl881)ut9jGh3$2Hu~r|eS2^~i zdjk1lK|K{tt27Z+!Jm}aqzh`btq#m*tkW8Lb0Gxy?G9npZ5V5(K`QrXA2+|l-)4#( z7@-$tK}jmPLn2uR($rOyb4)UuJ+vU`8dJo{zqh<~Ysr4ks!WUh5PyJ+F2_=;8OV9g zgTf0BEI-$I;Mz8!bu_$&8oP-7p%UeIC?@Fa5{M^Y?w%r)k=88u-L6OG{Pl}+M-!Fq z#!=uQwofsat(w)FT6W;5cq}6cGVrGSh|6e3G{k>4`}J&lGyy?{q#^z*ot>XFR4tCNRVOk%^%QYWkeD^m5k8CbXW z5yB5bh{vXE#F zV6ZKq(i<*EYk&HRfKwH3O6faeB}Mfa7dx}xW{lr``#s=;-`4#Fcb+ml>iD#UO;sKE zUiF62+HbH|;o|#>TR{&y55ASQWJ4l06|_bYuElg?VcOPb-fH&)eLujp4hAJ4_!B-i zc-S&iM4f;!X3bKTqh;`TyM?r6YTee_m)6556~zw=yFwade>;OZ5%bI@B^_llWZ#v7 z7W=aGhxy*4p1YKT+|$4+NkT_Gf&3R4)%d!e>to-7>~gT>mWOUl46X_@il2VA1S490 zxjmCT<8v8BS}t*{g*>!UDoIDhACw7;;58CQkpVz8$ z1e%#v+(Fa6b~}Cym89L~9m82mcpee%G@?b1aq-H z9h%M%!Y2nhCFA|YqEaWHj%Zhl576px>|8}T#5ag3bt)R)*@{d)k5-GZx9JmJuP=bc zbf=S?mxbAu#@C)*zMpKEverM~o7Y_Jgg4!UD7$7(VG0ZSc2((ZAsxYKs(XcuJ(!D; ze4mnGKPqw6#TZ7>Rfdq3FM*S!jcc*nN4K%UFAmbgh*@o3-HRB(O3b0CR6z%H)0Nt~ zmO0E+ITo~@vEHunO44|KWypF37>Oj>&pEQaIybD{-cqsfm!r^=j`*{@zO|wQ9Px@J zLm&l0do7ee*>B*fK|VP#K>SFe8O~*~w$mKC1t=H?Vb&Vrh}Lzm@t3mnKxVjILc`u_ z>>}P5AN{uPv z9M*B~C1VTWLulM$LX=cc9G-KXB;)1eBPuv-&miFbs^Jk*!i(Gw_nH353hN5h`*DvZ zmA(z2$Ry!7&l;UqbFa;g`FVDPO=OswaF;*+CL#<{Jid)I9 zf3?luL$D;#HI$p26SOjjYI>dl%@@_dkMrZ}(-N5KV+Lbx3{th!eaO)u)}}{P$4WpH zA9EFrgf=D=j-u77Xa6Dw?NQq2zr07~Oq-aTpyZ|MfzTM|X8MWcrm4pntFGltf2CUe zQI5Q0R8(W*BbF0atSS!}U{+jjGF3pFS7OpwIC-cF2y^&GP>J;1c1TWn8J~840%(%o zNwe6X6OKK^Ez885bWVGjabFBJ`Yg`JRnr>FTaVx-1#BI&g!9gKl0w40r=Qb=;h?G>*#4;+S zU944hK|!x{Gjw95YuI{nLBVAai1$Y6wCkdhnF6AIJWBOQCgC&sdZAM@wA)^Xy^MHnii+lF`&}}ZVnBxZp>$8|tk67PM z>YmWlCpWikiR)67Q{7jP*i_kdrqf5ZqbdI3Z__+mE*3gkxOq3SG*~}+RH?Ii zVc&pkw+{2mMzFzC4Vh`3JP&0_P>&}ytmr@|CG2EE-{h#55Y;VnOk`kN14S=uoP79V zC=n`~z8bAO7&%$cdK9cgXH%#>w^B!?C>&k9aQmQ-lygV(hZD2Jt1+-FrI_K2sbys5 zK9&)mR_lZU{&E}FML3{6-l)V{5~#3;t5lZcomiQM3)>d#78Wf4^^a(=1CT{{0DCp1 zc8lop`s_sMFv^X~v0LYXk(_Pi=jP)GjlMEhAz|_yRx$yj#r?obd&HASa6Low2CEaa z2gcEWf@+~fF)xo^p;(3rm*Mj&HVb4lRzec zl|8-tGrum<{kUycj?q_`QlHy(G1{(|$ptKOUQ`$1EwL+Fu#ctvC+|!kXu?huBKa_z zU&B<@!mV>i+T9GrRzY9LFYt;2e;HuCK7JHg%u8_guy}yQ$x=waB<7mk-x%D}|7BVU z8XuJ3AEwFt$3%toe=+S}$sUFOO9D3k0ou|32em%C|B-biq+;gr0v8U^UgL|ZIftnbX8EBnisRDXUJVF_w&Cc*^{VTs%#4*%QMDT zJ%wmpDLO4VRA9)f?bb_!d$Xyq8!vO3zorNA3zhXw*ex?`*IN_SAe9cbn%dctBBk}5 z895?^=M)+ES+*FNdj@ELcfO8F@IqsW0<+RK1oMBp+V~oI%Ixk7CjPiBxamJzn?wm~5~@@o(_tcRLUSl>yW$OfwEOtt z?cI*d1;NoBEW`t`^dPnkyh`WMXn)IthP3NSODUB>HMxKN78?Xf|9$Pos$1F`W$h)r zW})$1oLALTHJLH@W-U8mZ?@{ge6F*5g=QfKWkgI|%Oj|sJ7Rhzr&5xY$#%~7o1&}z z08paK)cZ5QaN|wu$F2{R3vkhTE5O2n+tul6GD47_eSx6!1N__Jms+46{i2WYU93u> zXUrRTXobHlLChUg(k!S1SB^axa%)jkl-bQ4Pd@YTodk21wrZ0^velpeb=pELcPWOg z{F~alDLHU)ZWEZgsMbm=0?Z7XUHtGnK)le(XRNyO0;2I^$uAZO3Q3hY$p}rPwYGP! zT3`Rp?7&1?7ppH3&q=4G1xbINxF76o3v3|f&Tq7sbdJW>p`ZA`EZ?W6wJGjXWf?=i ziHGa+ja3j@D<#kEAMf^Dwcsu0J!l&7WuWOD;ityVTm(h{czHNDejniy&_#{#reQx+ zEvg9bpEeE5Vn>%6{|!?XAti6MqR~Kh5~am%gPP!{fYoDm{&CB}a3IQ5wJK=OhYb~~ ztMOzORpqOm28A~PsD>hxz_?YtY{>{^7Qa0)w5WP4jzCp z_b!5mCarZ28ef14se5=p=I@_1%1=U10hyYhjK|i+e;srtMKy z$Ivz?%ITPx2k&#cBn%i~)_yUy?-P5Qd)vt72wW`aPEN-<*T3)u>`7PNIQEvau!7_1 zK_YZQN*j6i6K-tyfj&8{dwfkkAmOneU7x)~pk>bbF3Q_d0N-FBR0gE(%z@a4TInoWeJ z%_|57=6l;3fwAZ^)FJ!EoFldX!-nOa9{sr0;7ZK`RAd6?uLw6!z?9caX$2S3h4?W@ zD8YPqV}hD>Xpz8Ugo3wvD8rm00jMcma@Z3fw|g?P{QW1}4GsaD5BWRr57(&wgKIdh z|2Zj!`{$%s(bdq-#nQ&~zbm%UVn!>ZfBM@{HGk&?F@(axF)Dfw)2RCDLSn%bC0X;N z$y4P6A*KK9xU?MW+NCK{&TIZiu)zWu;X!eF!qgp~)8H@& zzyY&;C;g#BXkVDR9C|$^q*Sb9y3}gN&0FR=&JOwM!t5f$s)e0?#9+{iQ^OG^lx0kQ z36u~zYQ3h?0^4wg)KP4hJZm0zsccgrdetT=DqsgzoY2f-n2^3Msyh|8aqeYxDq@Z{ z`4tUR*mM{5#qtd$K4Wgwj;JY|pKl4+jWp%O zz~9X_!n+kseCdL2ltZh9xZWJ+c0_qF+|ZNAnV-kOub-h7CbMmcmCxS)M!9Dg+s^pN zbw2ov4qbL}WpEfUlYv8&CX;B^*gm$m9)1#lVN2?=S^mb|9$7De;*osIl|$-We*6%= z<0muWHr+MRU6`3Fyyd8=IF`%Z2PIQ}YBK znUlO%v|D-ZjQKLep`w;<{y+?eX{!eYo)+X<|mvHmj}+m0d;G;JA_b4#Ux$;YC@DwhWw*bMcQ2e-$BpGJt4AJ+iO}3H^+SgpI#-vGcLZ*aG{F*Ocx<$-4NVLhVDLNrVsCu z(IuLIOP>CROIAXPd9@#2Sd8iD^JY=2B`fU@&2&OFME1g4F+VC~FfX(G+=5V9V`mt9 zPC15VT1{rK^}XuxfdZ~sc0yNzPv_5HUt&=3@4C!4T!iyDm~F)__GeZtLix%S69*Ff zbG5RB$LO+-%jmv6Y2T^0)4~TDNZG!;mL|>I1kIY&USG0tg~7a`cR=2Z z$oIBk)t%h=$nCtELU5&VznV^rn%@TSia9{8!fORg3(FC;R=ZbliK%8`GqT@mLHjNc zIa~zxdz&0SmKuHH|K&L9$V#IPC;&j!e`F;+|Cc`fm*@V!qLS=?=>50;MkTj2bnQ2W zk$nH|`yBA7P@`>Lk4Xh}|0E>`MGw&85B9fLX_pVBw$nFN-6bcY^S=FNF6j78sO6%X zPsQU1)-|Um_tKafNbr3HXO15GSMTi~aR-_QW#)#IXJ$-sw$&{$QA z1EwaKngSR53dvPecacxYn2x0v5f73UkQR~EK*>UrbR(%nI+K9%6-iqL{>hIyl}l-s z!ru+oP*k3=u%>mMVd(K%pC+$mWnkMnWF-p_+K3@r$LKn{PbD8#h1LBOOmx zM=#>3T#;#Q)WCuS@S~+5`V=E$4T~5G%Zh_DrK~g)gxcmXBan(=W5}SH5ZyJwTILeHy0)^6DK5 z8lVLA`DKL&Al~(N?lfX-==A|iSLSk43KV}T8ju`vmm3eJyDd&#sY`{~kZ#B@{JG`O z-nR`z*kSpcE4akjta=Td&CWnK=XG~I-<#@-q{~M4LjmpovbRwdb$NMXYdZ zP|83G$-^WC;g>3BO$-Co?DNAy=f z0p@Bnl9i9;IoXu}jpFH88J?tqBbzvR z=spr~jJpQHJt(G>^FO9{W5gepjK@_FC$C4$x69YwOw7q~kQG>xC#QEQ<^~{AHdHMo zn6iEMGtI#d;AgR1|I83H!}kSPYoRlLo%A?_mk`Yu8W+-<$PHU;53q8c3!iMo=@tC0 zZh>=PbC0RL!^E{>X_2u2Vku5$%9I<~Z!g0DqG)jwK95_$;>d+lx`<(%A*I89od9!p zA|o^m;ED7h>uaGN^W9(jO!d%9=V6~Pi*h%@u0j-Ez<=6#sgqJbHdDxQ#t6ev%{P!m`Z(giH;-!AGAd&AO-TIyxa5pfhC(j$ z9AB+m#Av|2r-T;|#;;2w>U|&U)OWk!?zaeqFFKJj&;EEGd!dQk>$ZX64abt$bF3M- z(i9$NM`{TGZlAv-WVf~s1RRfRq*NV$h)C9;q0d3P;ehn=sbIXJ)t;$|z;lGoZ|JvRPhkFl(-0YC)*0(ophECLkU zu6DVuG{+{M+~4R2i~IO}BfTwGrXRjfd(@tV+tTZ1y45Xqd!@2x!!v)U+Q*#Hxk_{A zu2ZwPrgQKK6T8<{N5@5G2GhP9kMyaoNryUrPLgHyh74mk)I!kpryTk7x_>*u(A=}P zZeo4##>VP~Kd!fI&DYSwP@CJ2?r*tb?6LK!)p6r|&*=fw5cfkb33hmV1U;z@@d8F|ZR~9h??h6Y z5O(zM@}U$PT-!QUAF@F%9|jDZOX6Tw>GcBg7toNGp&Cnvhk@`HWbKF}-*CJEgNk>R z=A_x>w-x$koJ^S){x5*0qXB}4O9ohxZN1V7Jen@w#r=8O>bf4L4w7!CPFQrkd*zGW zgfcw-^EmL~i1GA~BZ|9Rzns}4Oy5X^fm-kWja6Kn&qnUSVFMWrg7-LekNyb0fy=;H zI&gBP!=1H0(kpdsI0VyH-DSqZjL$QBm@$$%=zW_8?^>^HAR~EC$i3z>q~UlEd3=*2 z>LdOnu^@atjsIzlD9xYuLx0wH@@W6Qz6197Pm2TdPm80h@{a}He?6Ba{--tmt-tNj zR&D3+L3cm;**^!}`BZ+RaJxOv=i{aq*)8O;22oDoCBPVvtnU*=Vo8!J0)Jn>$NnD^ZIxD6PKGfk> zRMI|~SNIJRLQU#ywWujoR+_G~bPkXzoq@Sjn}VKsT5g%uhTZ#Ooas#h})(yrHj-&l^bX&un3!*hw7%7#tR#o5f%&NwCN> zD-yn5TcGQFQFhp@W^36IS2**xO|DR^L-;r4Z(^D*>On6AOxgaqcBJx5llC{-0G9rU zD6Vz^9ovLF|K6!k=Yraev-SGDHkwKcd5pn0fb zB@@tL)wXPk*%9PR=w$Dgvw&)+xC%8~`23b5Kzr(P6IbFGT9pCF=w5jNIOh%_R9CVp z{*rzJi183PICNzGy0flIS~?w}9`HVWj7O>6kCBl4wG8%q4oGC`ieitCu;XMGo2q&q zB1`Kl6L!6asgU{MUE%aGRkO~W)#?yAau=A{*SWb2jx#{?Zl*T1Qxm!}E5WvStb&5d zO4f-uK50>kymNKoVIAS~$i!)jAE!Tb0(nC-KD*T138apIG^4N<+^VS!-An6~qUL}5 z8PvpmUm9~s&Um8Qu#IK=FnVgGXP4-+WzZJ;+`vMlb?06;>)Y#ZR6-Agi8v8oNnTu4 zZebI6>&Kg>((L%dxN4Ldk~%i{p1kMXz`@`-rq&LHIM=Qs(wIM7#o1`9_Pflfdy-GQ z+&Q`V;x^uwG~NrozfoSE>uIA`PURob%cJcGS+yjaJmU|+(jR|ug z;FCi3k_h0WGhXgLMcO6Q3+Cn+KVC6@*Ki1`{9zMDPtxu=?ydGc zKVi4W+3uadH7wW6MNIiU#G#uuri}X?d=yp=+KvTP1`Z9phS&H`QRPZ?gw*sJ-0t2{ z7~B72>>Yz^ZL_W6*tTukws&mXw(Vrcwr$&XcI@of)(*ZrPxpDx>8|douU1{_u3A4= z)fjWkImbP&szGxPnC;tyd9OEAJ@=8DME@(L6Dx;qqve6*fD4>@C)dDaJPArwn20m^Z8+s zr>LSXjKlaOjqXi6nV`~in7Xxb8_w`&);(4Xk=z%OgO0f;)VngXM}@QJyNO$JKDvIh z;=NHh^q(KYK?4}=qPXNo3Px!^pR8w|3SWJFGjE(63A<63f%-0bM?@hIe(0>U9&Xh4 z0?#9@;>3DHoSiZqA?N%q>ok5cLij~|kMbBHx5g{+LEy2-kO=o|2a{@#7xDZ+z}J2Q zufrcOn``q4{8uM(T6OD=`rU~D|4S#5|8Enef5eK2(kUtb*I9W_DWj#{|6|*&zu4CA zpKQC1Lhkxp=XyCu<8lmVBd=`n7H14VoJ~7}RGy%W#Cr49(^>ega?p?9(e-ERx<`WF z3(oGgr0(HP_!__4zYa`F{L?#06ZdZb1MSb9S zmeYGNpX)u)rW(q64rEMaae1AlAPC84&v}x9h@)9{1N33vpd?z{iQ{}T8;&1<81#G@ zW}P;uZcqR+VdvOklxGv}Cka2f2bAwFu!t!A6vdq*1{XSIHUOu~n=XOAEjRegwugKP zh!p~qmRn3f3eOHz>5^=WjC`hlEJZI;95rs;JpYZLE5ftA$|@3mOSw+B=5f0E7tYoR z;vx?M^r2K5m)TZMyBgs@q<65^a91UrvhuD*Yn{EJ6jNw#XC5WkJD}c$ z-^Ad#6zwvRnINb-i_}Wt0Gpp!oGRy%06hA^N{6)${{XFuJVU9{rijGhUFRd#rD4+~ zU@$Asa???8)68}mwvJW7UO6dF9u*Zub_X%escZz?b6*A&UtA(6yA((=NPWv`1~v+L z^xmPrKZpueyVXoH#tD=Uu z6#vQ%6};+jPwf8zOY#<-U6-odLItw0ipXN*?gP==`vRN+SvxX1+T`6YO(m{e{>^sN zX=GMHz3B5vX321&zew3uufjQ;n1Q zAL)$vV*TqUswNuZVcZ&?xQa#A@aE0zYp&+S6Hf&gGfWS1Urh*-Bo9v$1%nihv*YuB zyj2tNVmrqJhXb)rYTq)DdAxvwH^)ApEA)<*L`0XwV+3gjhfn) zhuVe0l8!32V_@pH_lXXxTY+n#1C*xKq8sR1UvkPu z*dUhAF`dyK#)gyxLUf;^SdpI(<7a=nB*+Wo?`OX!?*jkgl1TnHp8OwInUeBh~X);9hduGFi;`2?D_VIkm>&m3APYE@+XQWpN-=cJ45)TLcFHve3PxZGbea1~l zxJ0qF>SMTGX+OTxP4K-<*uD+Z{L7pjm3k$8T~$YYm1<-pI$R~6@GH=I{8Vvin##On zq5+k)VK;-RGG7})SDviGbNCcFF{FaRC@xqV>>w~xJ8C_|Yq-4PY{c<*{%mp&^3!>x z1!T1#5MBfI7I|&!_@$iVQT)!T^Ewx zYhfzIm~(bzAiu4f`$xx)t@4dc@oCQ7R3PXgXiT%}n`GOm>}L8CXs&KduE;?oJsJ_Z z?GJf>`jOI~!SwWD0kj@n@aNWlEfZ(n@sRA@Z3jaQ;4R73AG32n+FVxMmh<>CW-^Nl z;%N|M{8&}X`*-5pYsH(#?G*xA)7PM9TK;kkiLhB2u4?xo>1hi>1SNzz8 ze5EoXsdTsOZKM=B9XThZVG`?3wC$bV-8-N;DNliHbUIYQt@ZZVoh;^#VGUC+Afw;; zWF+34!ds~6PQ#E1KpUgc>h5I!FhrTUVJu!ku6P3`Pqp7Vj7P$%Q%~nP#fFyuKhB)fL1u4zw9eVEK%0Ot0ycCM{t>*DRiE z|7nvoLgxBm0W!BYKQEc(>~EyUt_StdwX4j!TiZSm2RT^m$TjDti6&*%Vl3 z(yEDRjE@eqL27UCPCSgeP6q{+B4bEbT488vW(9L>ZKBM+zOw80C$0D@MZGqWeBGf# zjlFub@ipk$YWLOfc#+Z^2gkf_XRw*=ZSKJ4Le!KN%t)LOb1qZOIHSU_BZlvW zyC>4DvELi7YS3rdz>as@jQ6U#xbbM-I1T`O=XadKTXbUoH3{$A?g@l6dU8RICZQl#+#W#eaGFV zW83b1M~g{wMbFV2H_$VC3$aARwtdFZ*6&CF_wjlvk;A%BCww3)|L1y#w3DFAM4!01 z{zbJ_i<_nn{AVgWcMJhP=*33YBS|+9uY!hDlQ{HGit#&pMXuZJDCF?%@J{{?m)Ygr z#}w~XAEJ$f+>$~9T}}_`w^gR096Q6h6_apFv*+Tz^vRP;Eh-20of)rGJdEDnz{#pC zBRJ|A<6mG4ou@S|y18LDQDDLK4u&w0BPABVhzNH84C(r&U=FAfA)2Rqiiy*7fEBq_lbGK$ zVQ|7{75?rkqt9!K6wXR0tFK6tR}m;aw6(Eo-{`)$0tCPN+O~7OwiZ4W#*cF|bM1LO zI=(gixjh-SuHM^uZ~F7=q+6{gX^+D}S7m^@aC~Z9?q43|&#N#(CF4 zD(AF_N~#n+kt(5%t;#f*puJ^9$L#`W9kbDOlHVr3p3oTpWg)+jm)qk=_KqVtP|5~} zOSV!VfAgGk?Ykqf^18Ga_<7|f%AI6YgT#!~&gz(z3_!yfq2`RN(RhZJxDfici)=GV zwzxd>p4%X~jgr&}W=!7)NuUPhrA+)4#TMl{dr_83joe4q zv=>2CN6S%uW4Fb)7UT}^2W#qTcl=NPB`b>rA4Z2L#!}~A~fsow(jnZ zL@k)UMk}Xz0tjid>47hNEt2S{#}X9i-&5TH?DKK(X-3KH`3g)m$9G&}m7j23;_5Jd>P^4brx z2yC5WuNI-rV2PqM&fl8xRa$u~(`=>hzti;bVWyqKqrfD%@EzCNJNr?9xnaXztI%|%~w zxtJen*f_+obxzpSX{AyZH%5;D@N)6+;FaJKfUOrCqCq9okf@9SbIs~=Bqy%d!F=y2 z4Iyk+_Bm4#*W>I?s~v9mC{Yq>Q|$B%DhIB>7!fC=6Xj+!r8EwLFU!V>e0y_op{temI^6dpdYE(vq2t{+(nA2z{PcO z9AS6Cn_fZ^s#CNF+c_q*L6Xawp=oF{G)Q_;qm+@y$_ivhQ`urGh+j{}>nt^;?-m9c z&1dI4gIuqQ)B_-r;c{Zh*XliEmqeGKz;bOYag5epHpju0MK8YcZp?sN$Kq?}@N+Rv zX4GN~;^i|KxksJ@wZd?)@ceIcS<7G_A*MB-XiyT^Y%d*6zgnQ5dp|r7OrGAr1B%A< zebVL~PK+Gerx9GZw;GV39V9f11-(*7%tJVxI>+o;zfBa0 zvak94p0~0ur)fF*GM78*>f1rz@j+0rjL*p{D&Xvol9nfw38I6MlP17R={obOBHSZ- zoGT7pNH{8!!z|1uPAukSs~7P=%wd|7^-B>vrRbeE>KK2v^Jqi;1Ytz5jJD3&sngLZpu~=;i|3viTbEyEmk(KgwhAZt~%vG%B(+$&N)&kalw=fu`D>vvq+^e_$LA}pb%sLHDvFZv*Y)Hp0)IR z?140QllYh|v+*t9<*OxY=q7w#1!v%vW@qjJ$+e?BxPfN1#J-<9w}G{$(P% zPcAk+=Rq1m;y|WOB_0Ky-JeO)?q)sUaMu@}%F@tnL(w=7kg~(@lcR%2vrUw&)=4)O z6U|L#b)A;4Ps~AoCBw! zisF)A+C%x_!!aa}8vpHPL9RX~DZ=TA{f+PZ63l-e0k!2v!`Bd$fsCyO?zw`JHIL~y zuA4&wMo!i6s0_pIPirxExKb{C?tMv^Iji!rOiXZPI6FSz>130>hW7XuT)=HJSS+qX zZB!DnVHIlitvAi6eR1RPQyPTb*XOi*P!V$M^CVgk_t5v*=r*VZMu%d#SGpC zK}Pb4a2iakz=jQ}I9?gKbP^7DS;r^KmAp^emz zF^w5?P0h-+L415$-jmv5la>YR{HTie-;T{5OUfQI%`ArZj$}tqKU+hTn`mt9G3ig2 zr84JxBI=orRna0*olH{$anVhZImaapJVx52(wjoNfz7l@169N-Ku>c_o3!c%tC&>H zO+_1&?`9_qq(UT{XvWc?(pdWBEss(QXojGY6l4;YUJz5I=k&L!y4e6q0x7eOSqk{z{ssF@Xfw3L0Pz@*_pI zBxbA+i0NWsi_sS+cYBoQu=Tt^4?+k26g6tas^RFTlt(oRH@6yV!`Lyjfa2Rx8pK#? zpfUC7oF$+~)u0Hr;vVNE>e-01%ZxZX8cTgd%Uj-*0lhP_@H69ze5=L~oEw=i)!K_D zg%K2L{juB)HVT1rWcUY@asSD9q60=fI}v>qJ$wRqoo3m3B5jC8IXxTPH|pc#U!_2 zihePjgARs0k0fav{hmNB6(k5|x+B0Cs`8*@ywl>1LGW%ml)?j&WLmu$cNN<2cN;Mq0CBPhUx=!9B)7=eb&3S%PM5-#qx1!QeNG$Brty_rXJ<3_KLvhD&|IQ4!i;=&h)(yJx}xAa)+3IG74cM z-jDo{Bn`UHO83v)%Q6BkH`*DQ3TdV-qh8?6&MrKR;&ZaW`~`MSUhJP`-716Q*6fHdv(BQ2ONHiop75EF@FCRlUTL>_&|nac|?)a^Cp z1kPPSg|0~kNTkk(L{eeikcfm0kL4}x5?1_CQRgBc!GN4_7kz;d`0#!n)EKbWoY!Gi zsZm48pfRKJEw*ia=UHZtkD!u`J4+4`3U8Y5(KE7?S2O0(CK4HW)#<#&EkTT|5KSfc zXTTq8{|#`$bDJ%>+Hiv)l2hyG>5Rl?3Wq*GKbXv(h6r+_D8=Z)=KZO2$v;!LOsb z4=rX?Jpase&57S_*%h>zm-aePIbdzWv-0avf%oC6@Htza}#Xaoa~D z=ILI_|BR24H#ef!H?#Pxo$&qK6w&-~*P3&r$I$Qh^2@(x#d&1I?AzyVYZjJD8P;mEEz zfKXLgCnEz|ZGbrDdpmne-NAM_h&Z_XU`HF+S#MZn(bz6kGG_;--Y%KCbF76`_!>0W zrPiL{5LmsR6MH|q8wDp%X5R|h#tUXwT9-Gcz{YNk8Mgf^0j@SzqMAFI_zx{*5u6*5 zWHYrIkso^9M3kUUzSSVA46!wIIJZfF`-Q-LmzHHZI?lA10+{T-Sg|ne+<~(M%-Et%LiP`Qui6cf_ zM<2$^cC6Mbjmpk`Gqz-W2=e%=y1nZ-DcNnYp_l`xwQsuUaThXpN2f~c!ci$0?_K|1 zK20x93_M6cEYsurwVMAU>(q4%wENO&Zk>Y%xr}*zBM9}rv;|Nd*6lJ02-Ic5ME+c{H8B|Kod zbpKNa#Hf4^133RN3@C=TmYYn->+xS#a$@22><}Yg7y!;sUdtxoTYg%80wP@h`WX4G zlh7pE5k~P8eVl3Md-a`E681B>-q-JXN${ngo8BkF8;6@d`DVeK!Rbai*iS|!RpD;U zT|uZCx``pVf(nr?m@Yc7JTLPXbX$V&xB$9(GLd-5x;Q>)$L0hTK>hAiS;f=I>UjGc z+~b~y4StayP&7^SrGzpzl5bS7J_&mHs>HC1GK2-Bt1xgvWfUI*Z2O$Xf${tb+X5ya zXVV>H#2$UG{K}l6|Xk^zmH1#m82Z^JiSChzdlA!5AVZx8LBkUpWDMzJU z&&0K(CTV6ge$kYV0q3)bkUG%lYF5|LO&2Pd3OXHnqHLX5w<)Kf@0cDF3HUWf<0;a3 z94k4f`o7QFy6vC*-UM~eS@HxpQ$DHAY9UeV^uJ0(zhm(&dv!`qDo!&-pf$L9y!R5m zb_^Q{kW^q%RC(H14np&Ft=UiYLw6oMTTpy2vE8A^4OI`Z1~X4j=FK>7MZUe!3<8&RU_(QTKz4j33o`L=a$c1>oXD$8K9W~BLlPesLC5e`U-H)EJS!&l zH#Im5WKKIyy3Qs`?ePR52H?Sik$*IS77^C75JO9f(jcIa4r{Uex=HXH1;Lz$S40$6 zZ}v$FgGxwA{JD)KzC9XJ1SPSqt=CkP#i;>dNXariW>y%yAr&KxQp{}gJQM!4<(Z+Q z!y?;3jBU2~Q?{{wP+ZQ(7#S)H=UwNxGKdd4_?bwLOb`hand%}_c&_QFrHD#O4<5{+ zRq<<%we1JlFM$s>WnUC;<4uuqaaq_H7W!LzSyy(-Xng8lC>}OJmgggH;jCAAN)RYd z&w&JavsT$*Padj3)hYx#MU!^LbO{D57A8r9wBMfH(~}c#-Y&==o{73~Bi9&Pzj;U! z6(%eQVrx%DDs$vQl>2X9LIWhQ>sMQ5aT2yoOP>`_E^AH+@?Ee$2TCp&VFbS*78PL5 z7!<~J(u(#@NBp9(`&uoIlRW+mhyX~0^P;#i9{l{drIevD3!uq@olvFMIO-9`(tFFwJ}-kfdPFG}02OW7b(^%PIpFcu6$darq7{<(#c|8|d{Rp1aSR_T$*3_jl?Syr_0%Q?K(^wI_aWyEe&rAMYq2RO$8+kQw_5 zc`G_ln5I2UW<^0dBxQz!H6Bq8Bcgar7|fWqG*2VQGxghcLSAx?gf$-avi_+Xx-E)zWk@NFb&=8tuDa%%9}j9f&WC+CxnJ8%%&Gw~qKU>Zq#0 zzu#;c6^BI!+1SreLM)Fj@<44!a=?)9;`y<6@s2QTSLN?-^@abbmE{CJjwHn`ds&$%!fy=(HY#{Z9?FY4(9zmw2(jT>2 zCymKkMnkNI^TSS7GD;`&jGxDUlbNSUWlHjmsq z)fm`=f0#zbbwC0DycTn2GiocK&(zdfgSO#g4fZPfbvWXc@RS$_J0;5*xc->bH6`8n z4|A}q7!jIImH#Gm83_)owOlPnoy6 z3lMScpd6#U8?caNMUk5z^mi#(1|ycwa&W<~rOIxGJD2b^Z`VILT}Ad10_Ky;267r2 zzsNyDShA?!{43lLsr??5qoa4*z{8CHipx>KNCqqapdk88LiR%rrXe4oNcwXV3WrQs z9X-&hc+^@)5k=Zko~IE;gGI{fflrdwNxDPbzpnl~RV0=Aak zVlK<&-Aec8aiSH|4jNiq6o2nN<~^xIe^*x64_~fLpoE~2b8aZbDqVL2nIUDsK7LZW zm0}unp2tda_Nd_QXg9|2iQ0xyhIRRIAa?gUyY-T}A@qFOfwHgTJ9jf& z-kFS~DRH%M%ZxP(z65KBb(pPLx0fKxuW5H&=Ly9W1sTi)>eKdcfd+n(m7Wo_88 z!Dk&FhaIkGly}4F<3$6C>y_%pp;0{*X&(TdNUSY^=99Z0358(ghu5b7-KC4sc2EUB z5X_4U@W2qA^?yaoNj*B>P-u=`BKGwcmOs|sPv|;$Z^92>JXk=m#^Ombh4_Wf~xR@nU^_F}h1xz3*5{g@dmEkEEQqo(v8R?Csuz59NA#Y|T;1Ec=z5<6vh zrN`49-x7RbN9SxGxRU1T_L*DEcBSC|qsW;!)wg; zT0_=i*yE$EwkA5II|@^Ej%|?pp1r38b>InIG~Zg(71+seqAgtJB_IBCuH939bh9V@ z?`>R)^dDx2=hB`pHl)SATaRme<{!QAHWlzo^gpx6MYGa*&WVP^ zSY0=vQ(&=Y4=lV>`BXfHV-iemTu@@*DgCvHa@1XHeuh_myX|(e4&H92Sm;XwP~J zO}O`vAj@ljFO!WVc{!nqJ^Z^l|& zkN4furQ(e}MqFES{IEP$J?5Th{f$etF;xdA-ZxX_iQ_IV{7(Yv4WMc%svjRCcHPCl zg^WryE%Rq3 zENsC83%p#~-?O^r?sgwqyW_IJeJX`Gp)XG|$^iyb11Qk(vkRqnujpjY_>&+dc)cyq zeAfwoe{k8u&R6K7YFv^9d~OUOOit!V8T&{@n7^feDenKR#XA31i^2YO_ex0o zXW@VMU4rlbS%$aN&i@(!etjo|4-$$hw8ybZx7<@?)nmzDId-%HF?8d)`nQ=}MSKx(_AhKA% zL;H^2{a`@gjFQZ8lrW2E@sAQ z@n?H9ZQj;C3n5BZtVu%b^N3=R%RY4?L%!BcL>#(14K6BJer%mveHhbsLS0)ZA(?D(sifxdJ z5^8k8F3fgxEWMAHY3A8J5y&A^K2$6S4kO$P#|<7=?vx;lhN!sDcZ`0Us5M=!6N~f? zI13%-35ha}^d6uGCHv|02Dx2?AK_mL%~UJG1DY_%BUaMQ5aahtifKR+hV*@2hN`2N zL?J**_CW;>cfLW1BC$#tP^`}}0NC}Evx1{R86wmPu&|)5$C$8%XCvss_4k0ap2G4Y zNyHZ0!P0r^BBp0bu(Ep8YDnt#oYED(3^#t%J|GA46-g`~+p5KJZ2z$}2I~KJp zb12|?C9)HDi9?IJ4hdugiV{7fr2v#h_#^+sE#N&~WHYqyY=~{^#uXm;Hpj257>FMw zB#wI?UU}KrTk|5}C?6k^(A)c1(U3h~72dqinl-vW6E~C2Qp9Znt|8j4g@N=I5thy8 zR`6n&qu>*?z97XX)=<9@$ZJO+x^!T>)M~}79v0BxYBLk!Lzg;Ex()T1Kl4-1Wu_VCk2{ilny&Bbyrbqlaf9cT~C~Wrznq_-7W@BG5MaG zBbed}d#Jy-Y)2tFDYEK}&ysqyz>=9)HemHY!)Xg5Z>2^s=8bdcv#`BlHS|&W_~3u( z)7%F1?^Q6#me0?ep@z~sOV%HNE<>IrMyXGav`22mu3UD`-3AJ5y(9ZTS7;qBj;Tue z6E|9Ddz5$hFD?AS!AT@z89MEhC<(~Y>utwun>f9{aHY010hQjOFztA--QN4-1^o=` zp?oP4dv&5kHrl+*RF~6vx7Iv;xl!*7j>RRGoViuuWkn4CG!_$49WdSSXF&-1iM9#b zAeMkWvM!i%9P($;Ey+<9N zfjZ!ISB=il(Y!QO-E%0F#m7P^%mE%yC%(p0%b-lCrAGUh?H1SN!{_&m=1oyyrlhm1 zB30`~s?5^D+2?|5mqY%Y{3=)|=)IlGAA0LKpZu9}PnRWZX7Zi95?1NYPcdf#OLB>) za4JJ`EyPc06TiFF>#De@CM&AAN=d|2%Pz?EXcwfXXu0AxxiYt%EqZ36$B7bmN*>Xd zUEdfb%C)Jzbfk2Mc?U&6Qu%XJY0W=jKD}T~I$;TYyl70Gtv5XIH5PW$kQG zJt(H8=k++9oUHH48k;Qd%4(0~cTsWjW`!(V8_M67WyeZe?!1N4LWaBtV&7Dx`*Q^T zwQS4na_GG&rInp@S`vsMZsM^QtJjI3ki?UmqHKG&;SnppYQ9lVuG7xes8;Yk^gVG zGx^WuPWeBVyU*`Z_t*Km>(uo5E_Ya8y#n9HZ~Zi$u?)#qQoM{oPpuZ?R_YE84Tmx( zP#flYwC-$*?|SF=>?I-9A|Yup=hX3tfSycx=Rur{pNq~3u2*=yrh`LzdT$1<7xi`W z=a;k(nca^9_d`*` zbvQ48tBpwx@ zOFq_Zl1JwQbvy`9^4__}8FlKug3nJAO3t0{eX}`4Egc_Y8o5Z&*GK>_>hl8Pw|>VF*5=o8X#MF5Ey!0MwTFITG%L@mi_AkeS$irBLN91LG| zCxiusnkNLJ-F)F3={n#|f5#OV6@HG14d!p);=yH|YTKLzlkEk*vAyMcRjD$zn-Hc} zzI0-_Lzgn--Bc~Jkl}V3j_?se z5r@uD3@0#;pVPVld?s-M@&e?e9MB4yEu!hSf~3TKl~_cXYsQWXs-2&t({vBO@(rUu zG^Y^z)1BiDK)bI%*{Hi=P+F%PT)Kp|4bo>F<%kj@8n~{df8X7 z`Cy+gIbjIqDX~#~*(BzYyD83Y>ioPw0~f)L@d=jjNkHmq)p(_bO083B0w8c zkcoptCuxT@!zeerUic*A-n@4RqH92Xdwc-k&Nwe;Uss3CXt5dZw%Wq_F{LiZa4j6e zJ_Gp)m8D5G%@U-P82Q=(OgGxi7(1>nxSZYh(*8A@p!M@5}92@Ib~om9GPw&&8(L|4HB>MQ+r6MlgNRlW#HlP1Vm|FGYHf_y5Z z8YCsA#A~P=4i#ToXO#E*p(G=JzrmkXYDvf09~|sZ1}-Y3!Qki|QSi0?Sbs%~H!K_) zZ|wZ4WhGUOa}9gqTtXL~pE2;_NPs&jUe!8kDS3fRWuqL=1$*GIZ`r#-mCn^|)Sggi zv>2^)U5QhD7BHzbK2ob=xFipJw*_S#E5)fRF(YxJTPwmH9RDB1x3=1smoExR{S$g+%^RA!{n0X}mP9Sq4a8|?4VrD-YhBMXiGXZ` z%*uz4l%RqLLxevTxu5RJU1=}0wPN}Amz9hATY{{UNtX(U$4PBOeFB%%gHznq9ZCuj56AN~xr{^?5RFiq>CV7o8l{AB4)-Wm}(j$(=@~O;K@A~7Bd&k?v17OtN`I;@{ro8yyc#}1QtID z-6oJBI;;ag92|y=>FIzSFe4`=wBd$LtfdFJNPBCL3jniAF`GKtA&8PwH#g*bHje46 z`{7YXt}q7g*^@`#9UAxTuQhIEsM~TD_RXZDrcZ0C{P<~ivSq7QekNao{npiE1sm)~ z;D0G$C;Gh=gQDSWnfNs*dyTMS998eL^~?BKY8bwwU(a4?aAs@( z8v+JJ9~O)~G+Z4ZNEIPqs{kOjdLI^sJv3GwAb8atU@HI+c6c8ai9Iw~9UxRyK2U2P z5O!N17K1%BQym~|l>zYg8_t<01Xx@W&|KjWa0U56O?g3Sj6(28HVOj*;PZ2Wnsb6M z&Iqu$MWDDt|NnoGac;xn+C+11BjeJB#kGaz(gwz@359C~{r--fUCbS;Jm^n=BvNR; z5MaU>hol6Nrc{sYSA0-}gNH8k5aq z>azysFaH!51(vUpYzAmngYf)Om%0<8@61mNW_iIV;BDlVuYjDmZcW2txl+XD1r^g_ zr8jQqVtx8xkArET>yIG6Br1P15|zgw5@K zVbJnyE@4uT0zH*lutj_^|K#VUWN#1B!+N zUkFDQm9@IGZren+*vg}1^35$s7*0i{q^{QF`|S`?j*7kMNAYA$rVnD~^ai%J*)%&0 z_PLqt`FQSJ5&?h$Wfn1(`SLM z5ZsuNV`dAeA+{3osIjW=fI-Q}Lk30K)pymss}a@j)y!|}U|54v%NIBbKzdX=rqve! z7P8Bm8_l(7CMPKdsX|n@q|}P!k_{VHfWYvDpy^f=TAYJIKlD>y-6Il?PH8)lckDJKn&PPUOIuP^lYbyfRJn8DKk`ks za$NM2-!Vgmww_*0V^68^#>I7bfIp>9iOZ1r=``|{qHRLSq=95Z1(ximeA>x#l_(g# zQ<~yj<>chK!!mf5Xng$ zCKmVA)!q?MRorXSly^yQu(DC%q?;W&=V@U z+22!-i&Q`1i+T8d-n(G6BL9iRKHtAELs&H~ExgWwrMjH9W9Gtv0l#Ok4@;<9$n;Q>rx! z+BVLXK_lk~F139)+!=*=`O(&0jDF)Tk~Gl&W=6oqxvQf&^y57f9(?QTFFRS4I z-+NvAFK$A&e*^SC+=P-=ivK0$oA?{ff1STW0{()u*gqh>3ZD;H02RMfb1*kng7)?9M5$7(D0+oJvbG~HOE&b! zjN(l<3`oAHJh((6;TF|eag+dH08v4MRbEe&jC3BsK`1Tcl^+9%A`w{{h6x|aYu^Mn zuo?mmO_7y7J3iIRLWPWS2e~Tb*q7GHlV?>Ux_L+4;f?e_UFk}D%?eRuMT5-G{AZ0l zl#~f5F-%n6%DDlcujbD4-eX|*XBl~pNrDqU2>AxA@;^t_J-6KA=$sbRhm_y$tYg&f zLP&@W9OHV7Eo^DjDeIdJkR%8cQiXgrJguJ zitI$xMzG55i8R2tEDNk4`Zm?ftu0dwMOgW&FjAH(&Y#+O z+N&U}uA2Ltk?20=oSVnKm-?32B_O>|g89!cQ1DmybW=9e%U+3Y1VB}=7}pK*d* zdm#awKPGc5;Q-;sQe{*hY0u9%((u$y<|iELEca}Pogs7SOjV>bKB7~`){uN>LVMdN zV|Jf+s3>Y|ZEMI6(_9yuJH28H4JL~7`y9@<`S;l18QAfEifwuD<<+7skoN}dBk`#* zn9Q9|PG(AS0?oMBi1sz_8?!)^h_mH+!S_-KofpKMidT*ywe*>p1V^Xu)>86gv}JR? zRVnby*9-hq9iCT?(N7t0gzV)YK9RMAP%wAoZ_>iKH(lXU-RI>ju!s z^6?SkqTai3Yr18;w?xi$q3)7S;g4$?Xq3Fm5T}oE#a-WL%$)HMg1fYhh5m)Ja`e1G z`MPC;cV_sBM*Q>5J-$=V|8kAF=BN7p>XfFpo3JDP1B|;T(k)TvM;Q~_h1?GU-JefW zrEN6*cN4pUS@)DzsJ145ieM9xH^A}u0lx6LZ84%-Y#`MRbeNrG!AMjKL$};I8Mmkv0 zSA&U4$dvkzNwv-S95%$KMV3x$a4qA%bSMFti13?8$C?EO zJ?76@%^KTr5~>>>4;?+`>zX<#D0O8jcbmPTn3yrG zR~k^36Jer0iGq};86cV7n9y_AzQXVksoGG9#^`3F9GD3H9!`fa)=9@LK#-%gn+NF* zR!(Fv#~7s`ahCrOY|%piLnpymFt=XL!c<|1A2{D5fo`8&A>pr7fvPRo%Xpx|^zj=b zz|79hhFef|XrOA%Ems5@VewpcbJorD+Pp}SOziJsau!J3Iv^Xmlu8}=12Y2R>FMam zCZW{_d5MWMnxLct4WdDeDbF9oPQqT?9dqJE9YtyFKcn5SnVMuP95II}3k;qZ*lNLEI2>MsePN#1+e#&D(i-)ZIl0R@>nydQ|E%ei+aNG*o6E>G9atECaiz64`xthl- zwtqlO{z}K_x$C`8z{I7=or8)M4?(5YY zVj@aMHRz9sdxU`_DFZZZ-sa#xqThc6@4V)9!DIbNw$XVyDl2|3}w1|JS{?%_fa)v$1X4ww=bdZ8mOfr?H(hZfvhKww*L*b?@_@^X~n5o*(Y@ zAAD!c%r)0sU0ZYz&1I@Yh+hFYI03nrx3S%Pp3t@X%O&;xKBxVnFMT$(zSLfxXC)X0 z`qEtW;lU}hg`uST-E%H12My&lp?hk2g(X-Zu*c3-Y3P&T@4pJ&w}JvUEHkOy{1FV~ zWMQTP4WW@>-2G2Zp!P{NZPgz5?^MwjM#Lg@>MyVX!})<@g%mk zPNfvJ1^=L?ttA(**N!^qYsvG}AK)M1sxCopkItWgOT!38x<6?*!s$2{t3VpiHPreR z8>t1v=tHrXMDvP$>jS$)PKfifA@KN1b!}lF-Ir!DCp8(tu4_W+ln&=)|fa{%Qm#j&H zk5zBh+i9!@qpF>$9)k9Q$*N`HNNn*b=Br3>d*1!)dq2;5cAmyC?#>~YwlQzn6*uos z&mmU2m#`UXd~-=0vXH|ff5s3&W&)}bN5YMC9VIVYTxyOs5XwsPsIjT)Hx-YmSq%`r zN#zs7oCoAO`DE~%Y4c{ijWZjH*!3G>fGxVnKG*)wU68P`O`foRVw@cmZd`#)f;fp) zFMFJP`YrS=6G_r!VBOU zC`dmQ0=CzW`)g6r5mg@dvJsvyRfyy#iJ0aEu%tQ?yyv9&W=FhpGp6=*ara@DY~mif zPUbN#pH7NBrw$<;^BujhL2|)^GPMbkH0@7pmNF*uc`YgKaQj2vc|{veXHElP{1H~W zmL(@2t~fVxSFmxtIz7dS^>WW5`xRh$O-8qUc)LBVHqdzXXQ(3-OfAnW5qE|jU!Lyw zqy?W$I~xL;B!6#K8A9x8d=QFx^;&;e++eO0=;(Q5d~)hfWi$^~=a1aJd0qm{z`unI#KH zxP%kE(si<9KTs`+37r`MGs|W9gmk$SPCb1xPEPNKeuj16)hc%Zhv57kk+iy1=s1wG z$WvDIVyF<7bxv>D*6`aCo$VZ}BX}b;sJPS0Av<)PtNyXaiBR-O*n;|ggE5H$;i0E< z*RH{wF*k5_BG7E`(ZttppcRv5zC-3JI@-mE^I)|IYw##8>2g@i2fjy#$}GFdvsNli z0wEB`pgF2gv$8*CcWG^fG5B47WNIv49Tp3Xq2^&nImNG=DyufjotcA;&txvI-tW!01?9n8Ve<%Flg}*8~d~p5& z^BEKZyt({W8l>91L~3=$dcgmhgQ)lO^7Lnpndq?&j zhgshodcU>vZug;eW!mi8Z?)Ybd;p@7zYBdlB{y;N(ZlAzQZ@%ya4LPH21=*3UiAdjuFFfx}4O?d$Y_#tKUJr>`F}oCOZVQ@9uEe1Wg>p~C zw{j{hlOd5lXRnohsscZ=odNu{N5~^;1ze3<8YYEU4Kj2VoIbuRS(O}zXQzLWs z0-k-iK#OXPJNgv#4MEEmYWzxM6Y`IM%D8?2%VJ809FXMBIx-zWIG_)5SgXh@bx;kQ zyLtMpgwqhDrA)ANMWm*wjT^Ts5=Gf-bVt*5 zFZkDxL+24HTzT&?0s34N8VPug5`@(yIAX2pj(H@%gNsj5)g?$gDWhSD++m~7hO{GRdoLUbqQxEM~QQl_qIhhP6ghm|kWIN!cTLHuTD3q=` zz}TVZqL$Hfjs|ANy=r{Uq^QAZSQ~QMdLXQTimwt5TT%LWqTbI2--$PV$2KYNvfsne zxo2aw>Xb)o_G@J}-L2&NOZM|ilKAZhOW6i){B)Nt!$qf~PKa%$5iuHr@|5$T`RtYy zm$0~3&~OoouspBXMG52Y20JL&8qyG1gdThIj?Z68Kf!9tEI%BmX*D5n3oUxWr()`n zw13V~%BxhlSQdVN??l%^7u$&8M;(zn0yX<7S(4cSVGiJmioKBW64s~FP+Q2_U+i`b z5k_`jJT>*q4fW6K`&Jgc_+zqGMgG|bkdB6vxq&_5)$^vTiZDZg@;KnZ;7!PF@`LBw z8c%qUWvwK;YtzSdLhqxy2d{EuG-PDZHf>|zHh8@aM3Hv!@bDyH7mtt2JqE|0ST}Xf zACX@DdgpaZoDSEt0qPq+OE}50$GjO;;G13n{%0-^CD?;6z95@C2;p_N8M?2U!H?Je zSX&-U^Hvu*_LyN?zngb_VzM{|m;t&u8Z>sXyOXy@4Sq>CX)L<5e1 zblsEz7XuQZ2D@C#*;ZGc8B=6jB!%gs_1Z-nD%w-a4P~1{MULFjHJJ}DlrKuoyrmba z=rBV!gY$<)7TtFBN}~d}1(;u#skawPMRhJZHr@<=br^a%@YdwPe+Y&frzaRkAm!e5 zX7$6@jv4sf1i4+4e?sRwa5_o;h}}qm4G+gOD3My*F*twY-1oihbcCqeT!h(45)m=+ zP1Bstw4afyz$|7jIPyw{B9gveU)(sIUroN{4^UtS5hierjKih!KG7F-Q5JiPGRX?{ zndzeH@&2UaP*a$($LX-}0s4omYa=t5Spku}1pa>`xtZ5L`#1ki=-K}lpM0Z++4|4> zqte~f|F=(`s;n(_lU7Kb*t1bMuUqmrkXBK26sTY@v zr{{sA?-RViu4qvcK>}jlGg4Q}#uB!{w$0nV(gEJnc=Z8Nof_`6neaYc^0-BYR!G~ZI(%aBP_+qULm##72x}fSrtinkVedF>l}_xp zyieAH0i+vym3`-35EL;NF_r~0wstHT^Te7XaM`G^bHD%FzdNb1j&m%b9o9pPBA>$sc>use5IQ7 z)!#rv*bgw_0Lsm|(xc3Fn9YlpIq@)uNhs5!YU4&M0Su)h)R8cJ zeO=^h85pBvS^TIk~`Z}hOdYy6VRq~QtH<(ok6k{;K(F)_5 z7jIQT&oNDc|J=#TY3(XteTrQPM5+E>;CK`#Cf=&&9h{xrOd`6@vM%{KyWgV!B{5Vt z^g15A^@6}N>HMN=nA}U`0DHaFNC&f8Vdva6E$p;TE4eW?iMlH)$cBY9DsP|+K52J; zK3RJ0n>5@01MPixxJ!rTaHsdq9lw3J^z4^9OSbO(DSKyv6Ubwgr;L8?pj>={9ECYV zM+wo;uiM-eI>gYDn{(v`eZ2NcG(@B?0^`3>IP?Ww7^!O@qh_qd{lm2Td2wpVdL`{V z`y&@L09a){ zGOP@q4-3t7!bY*!*hfO7%QIgqarej5#?;|qh zq}}l^#_v69Ju38IerY}VTrXQ=a^lN~*q5H@`|1Q#;4l^zY-{eaVzZ2Dg8a(6Vj}Mh zDMf~XTC*7N7k5C4}D_xC!CqO+N~^MCW=_<>^l z&-?S0$Ep9lh;NV&?AP@#2d9!aHzvYm6VD;npLerI=7bLa0cLq@%UUXxTAb7p^znKj zC7rg5s2fN&*L|IP$enA@73%R+!?Gk8@bHWLV}O^jd*JG671Odl(B@b(B3+C6t7+8Q zpjt|5k~hqJhjsFfeh@=z!_C~05!bw1l!m7(amuH8HJ7JmiJas`IsgYJgg_b|Rkf#M zU0o?SXljKuc^mRjsE)$#J_^r?K}QW1yktw!#;76FCf zvk1o2XN^6pEBpCeIoH;mJ3@K@ym1AEHy zeo(qsjpLdw!LLiZsJu~LE#DMyH!2^xP3nKYUY8xOLMDB}D%22|e7Jdz=<|uK@JqDpP{D1QCgGdi?oi8WX)^%$vR6k~@@{ z+~e(WKYv?#Anf(%jFd3f(e@VNZJV?kJvI9Y7z7zsbG-VTgr{5jC_%{6UzcVIFLk_x5CtY(9iE8@X zoqkmCdFxm-X?oo?-Td(6TdP{FzNj;>-tabw-yZF>o)n zbN$R3=!I_T$tL~mW>unMLaEI5Rfq`e(J<5&QB z-Wg?H(?yK2oWZ=AQIAAcY-8g5i=^WF27>i5e%tg%tabXH>Xe1IKK;(v<-U=M9S=s~8s+$9Au`T$j6f1D zm2Rew++#k+Wr2J`Ip1|QZ9@7^Ie!viIi3~=+#z))wP@WwI$!fQX8v#>U0$%p(hUlP zYU)I)5P)T6>zV67K;i03mX;Cid3_nvPAe4*u7^?mj}0h;m|Z6%7j=Nj^a-5^+Yt@g zEI~*$IBxDBy}tTz5sk9KMpJvLVroOiT`w|kH@a9EV%zUbxvNDAHOJGop9DcF;WXhx zhblIWV{529!?Jh+J(O~Jx0#WZt{a7?O`x~|fuDH~>Y#zgqbaIBFFLlQG;1F%bF@T9gwXE696c+en;4&cq7}8IkrE+@wF$NCA&weqbX_im7@~(y7 zj6c0Nmb$7pAg|seWYpM5_l`F*olS|D>;k;zN1NC)T?zG;SfBQ-rXKK;{H+Ox5z2x& zp)p1Qe&+m05L^)5Xa1Df_^J_R@h#f$5}4I;JLglj-?( zR)4=erQHkZo$GTha+;$Ys{{WaWNQ1+kaffY4t!GqSf*`8mZO9EEBcnXqnv_A=Vpik zaC5(Yiq&_Zf*t}@_bYfg;tnq^Ohs2Lz=t}9nhrRBsN2vCZz{+>ifurDF9xWo6!Pdv zt2od*c~ zbD><@ckdW?+OV2DrI!H8@aeaY{uCSxGQlExZhJSvVX~Voq=QyP#nNSO`!)BfIBvfk z+{(82FDNpc>+(C-dDt0|IY-)jkKKOw8L4ufs|F$0Ara(SzLrP)yU7&50{qo|ueq`^ z;;@H?9;8`qx7*8?*4KnrN%-dvc89Hpcy*T(Ox8O~DIFRTiCbQy*~|?VTng{a8mRt~ zB7KuxRRO`Zv1EShxvdyt;JFFHM6dcKK&V>iw@Bsb{5jCGhpr1aixWFIFw3x~!(w z9Pms^OmnNh#J=K74>=*cU-Bn45Ol&Yfu&VQ-dt~W^1cAuE(U#SVtM(v{11Asrhvmz z_<)_^j$}c(D9G<-pLHfuL!*>i&(h#7xO6gb7C}O?*>xPbBOWcE^(0DB?Tp*pn`l9> zDrakouU7)jH z9@p`wMXp9d3mRjo-f!50a2W^h3FjWXe5Vk!SdO->>K4{sFq>(n80Us6!gP#)KoMdI z_G!D|hRodyfQDyNeN+5ini){BLob(vTS~To*uj5qti9=l75p>q$=0sLR8MStT6q0Y z5>nEAl@6Vmmb@+ld!3^jYIZ*-b_N`+@Nu@*A|9HIfZii=y0Gb{poN+_KwCRQ2GO-i zL256=@l~yQo{3&9s)|o_F^3*?M@kQa3HPZl`)`e_IoeQ_CqCKrgq2DZJ0 zTKZ=Tc>gfL#a{HAtmw(*L}ek4DZFt*iPb0SB+D;aV7R}dAtZj~S;qS{`-Pn?(D#)s zbtLMR8Kv;^1@k>@>%Zn51utkypMh3pteY$Lt4XGsire!lJ6Vm;Z19|6S^dD{)E0YCSkVe@%RjS2eqMf+kJ@$fj%P z8NFTF*l~6-R4#7kU=vD~kMTsZRY@I)fS-zT<-^ZKoXfqM;~B)C;^tN+VWUS4Jb~X* z1(n)ohTSfrlhOJV=QRk+nEoZ?6o^c&FXm%oZd2eJQXPY_`{YGb--_A%G!T3&i)(!X7PbzoOe@(kV0icp6t2cQReGdiZ z7M*(IgWwxyk*MJt>jPp9!VlfKd{J_Zou6p<3?mK`zW(q4nblhybAxT5JZK&nZ5J!^ zAA-CH=`OD;pU!=|Jkh29tv(|w!W;GL$_O>eqK2+(+s6IR3W<8MBcJ8m)y6L2UBS!< zoH~gEUe*tDFu`N*Ul)H3Ngc5i8VZ53?B_pZ+1G!`vcDS#fuFsUgYADFld1y0dbR)i zTNt}#t2tT?+3D9Un4$=9t^nhde} z>Rr2hF3#JF^7u+PZ%Ub$1pV*!r~{7TG<%YEuD6G)bkyz?u+&deX4zW4(3DoH7)VR9 znjVYl0$b9OC~EY_nv3>oDo!ezZrKJ>=P)!t$Bu}f<`?8ojqP~uzBNn{Ve87sTAx<5 zA4)5kX11Wliur#Q87;LTMvE{dPLyltERIw2LrKs>+&Kx5I`D1OhN_ET zF`FuD?Rtx)&#!)Zs)RJ6Z}xDcm1c{SM~4ePaw#$MbY+v{o(fdO-YaUliX4YoX>@56 z2_MxysAti5SsE^km1=Ens?1NN<4b~YCkM`7jtZGdy7)I|ng{TpX0R>_OlrXJ(`B6_ zYcvSG`)xJ5yDnp~DxG)sFSkfa6|in(wi!6J>%`u>UgAYB;)VEQR+d=uYf_GW^#~nI ztk>(rk0ei0lzrU_pT-=fW*n=ibzaBDsH}!@RjNXC==gF@{YwZ(lwpq0^=tG4q!V;! zd``AT5@@y6Azag|PUSO+Ye5YwD$g(GAMoeT?I}-@lHfXZR}>kYRhT@WleoKJc^P!; z)fs$A)viCKLJ2Hq_Hk{BTn;(29qO47aeUAKJRRc>Pf_rthC+ylLLlk7?IPsWbZSz4 zaJ=a~gK1gc#|rP^?jG!TIjQ`kJdg4)sdZ?3=Ok~%xxBlPZw<^qFRTngu(?&9NrTx2 zqBGergVJ3RpLoga9;Bk{HCrqK~tU$Z&qhTa|bmRLlZA#p{z0 zo5-E|qce4tHEz@TUDn8Yol_1OoAlZuMBc-p85b-c3g|2LVdbHd$EJoJ4m4JxhQZNy zRB*garIjso=RWh2$h>!xNvj@g8pg+vg^Pl6nKph79K42)kB3{Hhj;c8r5c21Q-s-S zrRJw*-!3m8Xf3E%SmGhhH4P<-x^x4uFYQ4cWP^p>5LO1rS)GuA&MTqe+O}gs6JS`E zyQ*8(A4QfXf4Tosb~;qwdAfoZAMyOGOjm|AHo;c3n?3?HGBSM|L&C64-^=Qq^*Bo) z%KR&b%b`Bw5O#sF-a`i_h^_XB5k7~5PBe`UgL$5P2Zy!_fS9r#UKFKeNm(*L zwT*e|$qaWX9Le8L-7R2LDL(rQV3A^%joDj`DT>V{A`n-1wdMzad6@eJ=RuXGmv!4nx*-ur=dCHbDq28JmyiE z%7uF0*DHftn;I#GL##IdW5-mac9ZO#eQ5U7eC$>Wdz0C8UX2I_?7BV@AV}%y`s1b) zH_tZ>G2HlWAb7uFKO_VXZ;ca*YC3ZwrUwU=M#ymR-LH#@4~a_*FD*-sIUt>y&6=1p zmy=uE$Efgc9CD}_i+|-HDTO1R1u18i_Hl<&zQmF7T3#Wm{jw4gPVel$wE46W>rPya z9-u*~jFmJQ@Q0%T%$}gsfhPI>KTPt%e}VPif)NPT{{^ZYfiV5&{lh>b1{!F-<$R;T z=QZmr#iETImJMAvZgYhhAwgfKI4q*oq{r(`k9sfTQ_ji2TKwwdHYWx<4+*28YIg`2 zXieX@szR3ELOcw-!;HBHHjWB??BJRlOI?8XpV;7Q+OZy0q#0+2vOnqDo9~Yiz+cL1 zFA@?w;fPRorOGm)Iik#f3$;U3{|4n6cJSu^zL$5}ZVjM^Pv(v-frK(mknM#mfU^`5 zH!TZ_swYkfmXSL3tkRJss;3g^6Px1|F|X8tVPH)mB|;f)r*`yPRD#qTDv+h?=sy(G zrmiRxdLOHpGHC=FWu|!}-pNR^#IY@l5jZ;F3tQh!Xvg><#LdS?N5Ai2bjJt@Y~TsT zn2%(8p>@F9kO(82^uLyx6~z^`&IHW6>QIQE zTco(?C?dd=*QqUzWF9qO673{E%W`pLMjZoBI6@=MC#_6J_8XE)kylggz=rdRH=40> zNmj3Uqsp?<$S(Z+h|$R}w?d`}6gCPIxh(neLN;b5s_mm@;=|yMk8#*gmTHL&VN?C9 zqAYki;}#j_pq(d!8~yo={6=HTiA7s?&-|QkRVGbEaNV!Yl7nANn%~hI|P5DcrSsy^tBI#|# zf~}s0Y1!yxFS_OKOP&42Q7%8k;Ib~cC6IEe@u@V4LU-n|j-o(^t#{_DTRFmdxo=DK z%0OkR^VC@~Cvp_93ac{@y&-3|CJnAh0W-J~J=3zCxx`Vi+$mCo`&MpVRJu?h@0&pW zO>j>W{%)&G0(^osrspenI7$7{o`_1c00pwH&Fy304~bs z2GT7g>0G4;BI=RtuQIOo==#pgcZCS|QHoR4r2)%9mw}ogooz6w_wc7CPz5z!Qqaw4 zB#gS{(I8YY^2;^SYNEM|xhM#?(8b(hEuGAwVISpBg(+?c}Ku!NC@iPWc4)Xq^ zz3SV)pyvPa!vA`o$rK1QX8!{=8a6;hi2g?r?lAmOg!r@x81yG3+IVc69V8jan4
@cTsBR-MwauG^`&L6og!5Am|>!GhJi zBQp@DfyS{DtQ$Y<$BmY0^r<=+?ZT=We4<{=s=uzGF!h8d z0}`(N!ly}<>8ZKYe8aBH<{N7(Clj-JW}l3Zm9_kj4CKdhmbgB;lyR-mPa9DZx;8ik z=K6&P+;mK>VhPyLDXP3N_yX1ZhIOpITDVB_7wtC+Boh`&vi4RvoHc6daMK+~MBmCu zSu+vV$wJT@7-tP(=5cTATQ6Q3Z|%5!l35098KBto|>E#tm$x+ zQ|BqN1=6b3*nS;gyw4l^yg0NwUU!rkqBTOGbwQI+*EKs*o3yvv6L`eicR8e(vR^Mb z8Yp69j}PqsSkDw{9(K4&e@VyjD&N~_ z0z(v#&6ditVYQ|qsfxn@#)tmq(9?AP{Ti2joG&vzBMkx#*n`A-+F93k6b{=@^=-hx z7=TvdJ$Cf^C_uJ)dkEFNkQHBF{iQVkRTH#qSP6e@Qn?1JjYR7vEx7Gsuzc<=xI=JC z^pjna+)fu#8MrqSHs56l%2n)5kao6nQo9hDLr|^|w&Tq=+s4_d+zBzWzgC*8}D10xQDdu8#JV{`1 zk~>-dyk9C?F23$cVd;QO?kk%9JB1v9N?K@ujAb*wbx9rBD_{>jeugR{R=K<&h9?Bak{d3$t0&s4Rq-Ai}usGQttQTMk2k;1MFzp0?i7 z>eLuTO8?gWL?D3KON&=Wz_-EF_p91vMW9q8)Eb>8SnPW~5&Gw&-M9_^ zsCDkA$?B4WHJr##8xuw<4opu9`JG)F@s4Hc0RbqmshG9=uGFgJ0?26J!Ydf^MqDHJ z@en6Hxxg=w#8YMN?Yoo1)?Y17TBCT4b0ikaCSNlA!=+mFE31zc*sWYD$Qy3_D9Rp0 zaU{I*=8N2BUVmY2|Kth_=O9F6UGwpwh2AT+?!b}G+cMvWr3@w{+7j&fBlT8w% z1YnqT6TY#83O9i<0RqgrR#6QQKA(nJMR7Xuj2VO`Z%oX-c3JdFDil{{7da z0M9FbfdfeVVE#e;RHdE&QDOe~by6Vl`}fb}V&IFKwsy5r546XHqv4S(=-Do`4_E zct5RY%&81stH%}W)9sS9CUJtlx|OnS1;z$8KZdHd6y=8(o`+C}WT%uXH+Vxenca0pxMhxlYea;}kC4fk{d?}YYy%Gje$Mz)F4xTw^jBrub4v<0=Xb|A>o_UWLh|Y6DkC41B*4J9lm{SaLx)q+v}t zNY8v?h7|12doeWLVZ*w0ADT*a2UA9@gdig6Q>1ySpuv-8wyX&YDT{S6%z^_Tq^X>3TA3l2 zJOW`xHH~DS{Ob^yghi8I^bScfc*c-hnPqQW>bKD&M_QA6jr0KTa`Na<-m_^R}~o`e}kx&Bv=x34Ts3qrL7^Yr)bvxRcb>7{<#7kVFsLs9+=|_ zPoN&+_b1C6+KtoGJNXsG(o)OGDgBuFW6X}+>em=$U-KdFmFLVW+@&MR8Pk@P1G^Jc zg@&tC>b;LvBsG0-*qk8m-G#^f%UMRqCEyNuuK%~7tNjD%$_D|4AVaL)73)~otp5S5 z-Djr&Y?=mp_mhOl)5Kd()PAHQXagciCc;^OC!23LC(OyEkSf$yn&b*2)Aa>DKz`R2 z=nXIa0N8Jr;kI{gtR8R6-anv00tJ6U{TVvH*+uJCTFH{^3E7vS6U>@jH-wimqw z;MWoL@Ytz_IgatZEtgP^;>%L~%j(OvS?(K^RXb&57)V{WX%sJCg*c5HoqKidvMz$! z2=fR95_)@y1o=GY_e!khGU5#~<8kMo_JB(yes>Vq{3!GHw<1465AH`3%DHm51&d9R zzhvYQmV&XzFOaE74l*^@+zFhOzyEq)qv;tbUG`npf2%iJeNlJo1_UdeA{1^?pY@dX z>i+bx<2$qt2(feFzYqo?vE@Ufn-XU6<(4F^I8qSdXzYdsl&9mg6YAxuvJ`1LO7s#g zZ0v`y>!Z*tLQKZ43YL8T;a9|E>lG^41x z&*czltfS`N>zTj5afp;N=ttNO_6ly1=?|OnnEUYiudT00F75RtAbVx~$K9v0f3erU zg|aGxi1>f@G<|@)_0Ri*m43ja2`+H5YCra2Y#jhCbx6wcQKAe}Lz9Ix zu?d3r;I7!|18H)-&D-b$MaIz_+V11-e(=r0fGYrkvyCC{zKrs)_6dv=sFQcD5oS? zQntf^;15ZnDhz9TGbdSw^}|z zyX)4t)ljkOlP$>dR`gFvYMXI+4i?dLq5aGk*xlDMmeXL@@tF=8q;}DbJ zP3B@&=M6UC8{!)f9*{?sf7bwa<1 z)~G^zf6#|%v@UnrQr3^uQDG?tD6v7bPJR8M_MmJ|Mu z_e+#LakWOkwt0?R8q-6TE$A)~pbF(p9R3WuFINKqyEYU+nCt9!UsgcDE6Mxq8y_cN z+v;Q3hDc9YAX{x9ub`G6~;4Q!OJWZI7#WJtuXVC`3*$}OB#jv#{E3s$}lagyln@RxXjz;l=BVWMleMgwIMIc}m zpsGZpi}D;r?EwZA?bE}s^Bss69#Pag!D}urVUb|;%;MOxP4m&B4d{AxW(^^`U}HZR z#eUs|devW7&*7*N$A50-;LFJs8q7V?Znt+s&EK`pQXB^HHHkW^W`{>^HMAMq>u`u@ z1_R*^S$gv4-buybljM3ryn>*^S-a_da#<@RWm*3lK!7vj*HhqFMEW1{_v~K)^0)j| zW%%nDytA7p90({7bU;A<`cYRy0D*b~!pEQYH{JUGW05pp;8-Mcp8?P0hiK!HR->CX zGC4Km2oZKv3%6{sgi6c2(1)K#F&(eN<|k|(evikiEzj}ac%CE6sdEqaRqvd4As!kl z!MxXcRl4r9u*R2C6s!egw0$&ELs*D;VL4T6gnCz%MmUV^O^FQ+G@;qTN>R6LciG&W z-!L)Sc>y6`Xu;q$=!3RCGl}&H%lFUVaDE$q?zYY6_%Ysh6=2?NyAewfTXNQi-K% zXpOucL6vq~?!ZVBmPLJ`gbnP_k(7?{>)Td$P(Z}v6nGM-T4g1i#8>01_I$EBX!j}R z{a}lb`M2zV01(xJIDRFb!w^XCN9gbmG!)fk|O zI%0AV!%q=U@1CpA`i=t+$x%P%ve3I|h+X|QLhrR6$-#XGQ^zjZ8$T``BFk)1z!OByer z#>XyZT*w(MJ0p@WO<8}3yb{J{z%a%ZDEL|C2t^f>WX{G@?nZu~K^sz*;2g9+6a%G+ z>QfXqiX$;bW>6%n&eEugJIGBNYaQbma?N-l$y^g!8bz3YXyWu6U4Hj&u3uNC$9;Ye zY;*hB1{B*1Oe(jvEqE7fG`PDrZR1S5?vXeB4%@0e--5?NhkFl=GYsJ4?@`meY%CZ| zI*}|GLRiw8ybtrEgo;xx#@bkGRdF*r79;(@_cTK&lmix5584`tFHm zn3Yl>g1IpHqgv<#0^pEb+wk9eHQ`yGS>7r3X{EH2PkLnmUYlx@PT62B3tU`fnbf9JQ; zNH1joW3zN>cWnFM0=NgUR!dh+?3iyJYr5J6jjB1*8|^@L5|piE)$ny@QSi+*Lc1@(SxIQO_059jV;&6wq0L4 zp*_w#UuH`OF%m*uh(6XgiI3bt1{EPt?s$?uKXt5RLRrndgA1lj^!d?RMGjZB_XU-) z>>k|>NH4#E6E+9%V)=KCg;TU!`@ID~3%kc`JCalb$GAKr2glWj9v00-@`PboZ;B5G ze>L4|_UI8B1Mz469~b1R{{>wC!+ew!|1DmkX@{| z2-!aoTTi=U7l}bF^pYfRlw}v!Z>c5e5>SJ>v`dI$r{^np#`f~UOYIADq$#D z1HhhUsMQx|Uu?U1WOJ$pa-^fvUo2ITx!`+O z@kEyS<_52B^`-*Tylb9-7R3;jAZyD8ys1tjJ1Q?M3wgDr`!&oRpyENN6!jMq!ab)N zM|j_ffIHgeFpnB%j9T(o9Oz4(qUU*PF8sOlOY>>M7ZY(R-DEbS8g!tSDl(dlb?6kk z7(MM*d}Mly9kxwJuQ$Es)v-2$%^a!dLfjnBHY>s&wP~gHSR>fN}0)tpq3YOUL=#NjuoxKm5owbG5E^pFhZhT2FZ<+5e+D zYXLMOzWAXnkxV*X4HcMsrS+sGupZ;^r5HYKUs(~b!(cPtbWlARtE1t@- z^osbyk7R)SsD~zWz34A~B)^$*RZAFte4n>&cQY!1YG{~{r!!(jz1k5I@mP>Y3TzrR zv~RwIB>y7D?Uw$u1%jua9lnXibt$~W{c?!7sr*SYApc-}W*lRC z1o*d7*Ckm-M(B2$Jqhr2E@%G9sp?gyMf25IcsC(=EeE^BbVkZ) zHbqkr`|_y31yUkzXe^Dqr}IV&>hC;D%IPQ$Cu}!wLvy^*Ff5_TVKU!aZdl%xqQM@N zva4K$mt5l?i+c@`rCGv(P?kWQX*#;>)F(&5MEf;ZYEh{{5x2>{wdX*Yai{wam%RRv<0ENN=Jo1wIeii zHlU*!B>U-W2+hXO>ly}Hf1Z7Mpl1!HzjaTpR1ozhF&vmX2Dq>DDR9$c1x?|)ur8>e&Rj-%j z3un6#jG;RKivq5GrxWIwU4l27coC@5`tB&V$ANzQpisY!a_}bT)>|XI;zyGkOz2sj zjvo`y>WGNp-y7OaPfna#4x4&2qhO6l;uUC6yQ@T#ENtR$I_BON9n1=*f)`J?!Wkz8 z^V4>{f}6ztl5*_+zUjMg7SzGS1kXYw|KSpht&t(4r8`}#u%f*vg!d48w?)Z1F~Gci z$+3VXgx<@rFgJF`aHr_^0A04-N(Vd6LTX5}7(=q!#4>2HQMh!Xqxv{G=R)Yu34ZGz z=ZlXSal&&*-vJJ0`mccODz~zpDqM)NI+dm$h&cXxRjOxxrDpM*yScgBeAK`57S&JJ~q zh*%I&CtSi$RXr)`G=Z^L0c~w6<7G?YlBHeUX$B7o-3`Aam|`t}Zw58ltkzfNpGnjy zQE3$BO2e)#)nx=1sV#a?P!+4jAU75Rd9gKZQL)th9CW6o87qolf zL4$Q8e`IZgEg#v%_Qx5%I(33C`YeEtwEpCNKx%cK5#S5mp|}4`OC%13>{Rx{^^WHI zCrpWC48yG-MdFOz_FC*uBuadp{r82tACj>Ny(3)lc|tc4ESti^_(d?HPY)g&VeT){2itu(0##~g}nvn+G!eG6U} z8o$A}6O$c5vJ;P065JKHKtvLeXzGj26TCG~e5_Q<{Vp}EgaC4B9!Q&y)B~T)PNEKq zzCNte{e!ofxwJ%#m%1~>jJ{=y&jtk^vnjfm6E>{GYn$BQP#L%2#qVu$cF|;p&qtAj zKJ+%YON4LoSPB`YNOiUN#LwYJaP}p~CkW=EMVsDL1j2fVH}EajCEz`(1dVykCrt`i z7!(Vw5U0{rV3mD_~ zg}S`%nH#RDwd>gp@VqRho%z6@XrMu62pxBWF%>pcnFKH%A9T1o2C08$p;K-*`d0LR z_<9SVI+Lvn6bbI`?(QBSxCVE3cXxujySrl4E+RAt;&I7t{LkgM z7ah7q^9wl1!2H(n)zzeQmG8}7G;Kh}nxs{Tfdwl(>crCckgb)0+fR@xX%>ne?Tu7T zP86l>Yaet4pvh8j*|_4UX-XTFevTZ5rG6J|g$Ndl8}H$+G>8crS=a#Eh!{gd7~KfY z?U$@VZk{VVfHiNVgTc^FG74rduEvOd>TlO85@TJ#Z)g@Sw0sJ=kRg1XnU|dFCk7=r zNefm?!l}P>JR`*ph_TD-f`%D#WI7QRb;PfWKhBT~^MNH)qUy!#A&~mUG#fv!i3SeR z2ZuDmo!^+?@|4Meo6}wDt{-&iIj`b8Tu$aEWY}pQ9%L#hY`%8U5DIjlI924NRNvge z5s4V(0Vi(Yzt60zp1A;qNH3c_?VdH<7_d_RDQ$r~MUoN4n&-!`0|`J2d>oCo-Di6 z@g!2-6W$*VnG?J`R@03m6T&Y4Ay8vD>NVPAs#O20LyktMOY zPk!m<%D=+*@o=Elv9^m%yeT1rR_QoRg2_vID^X!hW>!c6b|X0X@YIex%!o@+f`~id z338%Pq)1&<*{BRM$qVR-PAbk`$LWO2Kbmucl(F!!V7VBwCb7M!(1_lFILjg3qy*Ka zNC;gXRi%lxIqxTZcJS~+yZH{rH+$wzFXg8e^JFvD&aqR(-5(>mG+*@PzP#aJUz%>+ zDK|}5GWZgaA9@n$XzmsyXxn;&4Qa-T;N;j&+w|97(NOaRi_$;fP6kcu{DcBT3JMmV zUI;Ik9?XZt=Jx|f24{tCis=M{U|4rJhD{L{RP0j81NyQ8FlUVT@n9ctPw`KGwlwiC z9k8t96lOKOd#&ntjqcQZ(($`Y55#b3h~tCvH_+9}!Sv_3n{*9q z?;P||GY2GE%C^f=^JRW&;`O|&?-bRHV<(}C_awP6zXmbC^YltEyc_(SF-F7D{-pp+ zY@Hvw0ss5Z?G6l%FmxkkGM^{EFv*iz_k9)FmrTJxomrfZnnH|R*zS>f6DI3)U9qHB z=CuB`+Xd!A+Pao+e^qmXjZ#cE1K1Gcw_<{&KiKf^c%>q}qS9Xuw116PE(6%`*Z9+c z21r&5q!K+PrZHd@Szl0VH5QO7g8Ah@JF>D*2HZqnW&lUOA&f58*>7m2(i^Le_iJ%u zS2S)hCmPh(KL@U8o(VQ^Y$TmV=O|4cA`Rg=B>8F~bR6owTAuV2>5*YVQN2go4SgoU zar_B(5^t=Q#~AyqoW3RXxtZ%&`fBBgtA}Dh1sn5_H@p!ZvH}Fnn~E6Y3oT9iXGH;$ zW+H+a3tA?Y)Hb;&a#Uz#0wZ<)VTDy%RU#?pSYxH@3|4T-m`RbzgYqfXkrFQdaIAsF z35;`=wA})ma&c697|9i6VR#cVq6A|_sH`vKTIE0d;$xi^ziAI^?KR`K?(=e}$xFuA zK=>dKXhR+%AbhOCn3N01^rN~<@?`eVehVNPYo7TbT#kzd_FXbRRPYRHXA;7yiI*iZ zWySiO`^i3Lv)^!qsIBQdp+(Qc9>< za@+NR_}u_|1Kp|m#$3xW_>)_RXQ1X?r~5nC*j_AcV1JK82I6AjiIER{ z*^!>-?L{=G8d7oH5Dd))B$`Ys?<)bNzCL`RD83W5}}6w zX95~D8o9^orX!DM#e{yN_>q){TQ@f!Wdyswj$$2Tf|H7sh6)$R!Ed81QWhr3KdFp( zLE+@G)u@>qB-BSsu}WG95*%c@?XmXjf;C8*XS0mNL#5wqsxx_iiV`&{O3oUhaVP$4 zJ3A>`WEc;AayDrZt=s#+=)E%x{PG8_-y0R(gv{>iZolARob#qH-$M*X zp}Vsvs@WFVU19O(tscd*Y%usbI#-XF z>gUF*tuah)bO`^GPLTrDlKfp9?QP^yH|8T*-pkuuDV`+@T~~8mh_mEOjuJne^q)5I znWi9y;v>kIj-KICSOMwh{kfh^Bw@QHqXH*ZhOmvw#vG2vk&`1jS|G#iSEKo({u*E4o4K}C9ESnnOqAO1ZU^_~{MOW?DMK^lA`xn~{JM5xT$)$0} zPp#ct{pOQ`mO3@*=(hfODA2iWzPMEH9qbqUK?$k2KQG;t|u3hRv<~5&k zni<&br-mc!nU*;*&)j~H-dA!4Y8Rc`+!_Oy)b<`oHH%Dit}#r(01pg8b3kdSo6E!48CXM~41A8c=6mSRBy zN5vMb@sEt>(WxKx2`g9+P(=x7RQ znjqEot{^xZE$o(qQH4`53`|B9@95P-YH$%jOr(=gFPLNrbT8M@=_cr%;!>_%v+W)L zcM_?K?#gfUvGKm)YAE@ZV9lZ66ucjkwuQn@Cb?G_x!v^Uz9Y%lQ6OkZW#D!J#-TQx zx%Hd_B^UxSaWz=5lzB2reLUo;hH5;@dY-6aI2WC3IMHVVAxk!ZxCsaJUkZ)o_-;e{cSg>qgjC12%A70YxUEOG5 z@0%YeaV!>n0&N1dD=+MGry)+4$&%J7Oj?^Eny^ZeX(DDo3Aew{gLsM>ytM7mC$ogl z$?mZ%jFPXB5R$F0JAo-Oj*reeC%t(x2dw<`Ma z7b~E)s8?@MD~9ly4IQTHRDw3QUc6xo)uaa$KG`b9MCzHcCF))sjJq}R^v8$dSMX5u zg_fQ$f&-gTO(?l9O^4NbsWViW*}eG(P4qBbXf{=sbxfy95xr+>(^3_cbs;XEEB>Hv z?5A1~%FyOHc}Tk=ju2RO+O!!4p3%%F{sCC`3`iX9fpy=76&`Ap?({H*9!J`1bT*o? z&ZCx%PEH%VJx>y2?ER&;DD3j(NwC?QGX|||u(Yvxnk5*0O|V3NfVVvj_JoHydfrEi zoSb?t6)sI5*l+TEiHeQ6%I8mg`FNQ1kRoCGGc;NZ{zCo`zwrz5eTagQ6fEvV#@}vI zmkVg*13-G8>Tfc3KQ?NCNaG2e=P>#CCi!d-|5kzlyt+ zhHg1&F;Fi$$Oq+>Q19t~9lBz_tG0E8+lzg_pLm0`uZ}Ah4L{3Ddo}s7I=Q>+dI_wn zQ8u>SbH(!4iN%F@24_GvJqQMB1OKNij?Qbmh)n}#Fl6n4!(vkn&rbua+6 z)Nw$wugES~Hk{W|FSjXnMTi=qEKQRa;!^`yph8@Zz%Up`XRS)3_BgHEvE&v=?0%Bi zMfwMB=prX*0mG=4Jg1nG^+|Q;L*}0fpXc%>!?y{u)u4MHPRF4< z1w*`$>(DruADhP>#F=MGe;n`znd5(G3@vEgNNzPJs+Xu|&LY7AO;-XdM?Gf)-+~~v zz2*K~*LLc-84&kn{3*ao&0zWh;m zQ~ju)A4cN-(u`gJL_W@s4CPC8Os#zE?*Zx}vqQ6P;% za8T?^mqoL|JFtuU9;KAc&4nhJ^^zk7LE9Zkiw8fPDdqDBlOB+I5-HWPx)~^D=b&gj zMe`a^p}cN_Ye*Y{r=OZo^QRvlIc3^eznWJpG9=~&(^R!I#*cE}2A(tl&z8hCgyW05 zAhb9T-s0{Y($q^@M*Ha=xA-&~UX5`HkC5a!~-%ur4{%Ufhv#LG%{{6`( zB9!5r(>rS@K$o$54x-!4x1U!l^AkSa>uul@rC-6O{L-q@RZ@p3n&OPQjocz`%+A@8 zss2flvn>O`>i|UjjrNo}h@mY~53`IOs;!Qr7-^15>m zr&cKuIkI7qimkEI_x3xNE>u(??~Pugk|2D_JsIDhD333@ZJYEnE`$?1db&@xfv0Fc zoKPesk3~mQM%gwXoWe!Y*JdnI?uYPzL4cd`%iM6jfJYXs6~_eeB?@D z_>lVzqwTq6$B7Y3OQ)Cmj<5GKp1O~R2y2iE%qsZ0AS44F=NZ-jy9Hl5=SA;%`44zK zrbsVaP;Db&-Z52*f&tkAw;ha#2i<3nMFQ_T;&1X3Zv?-NsZU}-g$RISD)w(a%Jn~J z=Ku9ks{eazcn3f*yZ?~PudKNL#D)Pk^7au8t6VSzR-w9iwYG*q#vvSkP6GOwVH1)# zY6&iTxc7T}2?-SyQ*f(@Ay3b2&*>u=H?bH%M$5zDP0({M4~><;n|&iwvSdAR5LU7> z(H?WDk&yx&ptsYF|2;MVEMaPgpFQ}878NqxJwa$Gxw3NlsWiWlo>1El5lY7OS?9Uz zRh}hp;_7xif`~KmI)>AfyYzliQT?$yPo$&RScb9i+P)Vk8;dey;toZl%cJEh_S*0jyNjaXIi3-R`n)iam%<00KQl z@e#KNS`0*0Ek^UI^k*CnypLVm*JnC0uEDD~k`5fJ5Ce}6IqPU=%#7M_Z!x!5j+NJZ zkix#@vv5Xc(`O(}ckq7KqB2wt$H1r>5l0pbm%U}6IbqD9wxUfTP`;8sPfAb;whkdI zO0G_P=z^SV^S5v=l+JYN$92%ZbqTI(2Cr6?(Wktf!$A0>M)(`U8-&b*IWlT#@2tw; z;$!CX*dY&z(UX*9E;2h)*|~fHUBV{-Hglc!b?4MS4phWsp63_Pp)4ZVP=<*kM|;n*@G2GmZ>|7VINuzRs$}M5ggk)RgHvJ39*4bcYbX2T2BL$J_X870dJ~ zzk~bNnON)g>FO(Yuli^BUqs_b%O!{cAe#B#!oGk460Kj^asT=G57GQDVWV>Z$^06B zMSYbwe-VujLM)(8^`lZR>b%Ti$OnljE4BseETsN%f1+Vfx-9Dj;<9+9)49v{P9BBi zb#m)3-*>`Z&f6}0E-%sA`;4tCH#d2_2Hr0xq;&P&@FbK~Ar}QxM0>=iB#)`qB>4j@ zD{16&1Ew`>fF_2ux4~;?s#|r3&OB#9E$j_f?v1m=qn4X9(h;WZUFlP%An)e*@ z$wG{C6Z+tI@hGo8PP6kJHR6*OmrIL> zT2M1I$SWuT7bH$-a@h!~5h0M~l;$>?&<=i!*Fiq!^_i}Cvs_5elEKDP3L2aqN~ORs zx(|feT@Z*(E%+L&CLd^%Q13}y^k@;gsZ+DK1(!>5*E`g^I=zeoI7bH4WAhhCdzlr3 z(>t3F-xBoWR1MVFS@Xd_n6fafr&ez7$)F7lxYMD z%B+mQIp2FX6hAh4)5=gjCswEC#nrZ=DuZ_sz&YfSf0+hpa1=GY)MQ;#XrK}}>2eT% zqM#TuM~~hpC^xO}6|aJ(&njS*#}|w8?k}mvqQWa!nSoh(>8#z>qL2V5aA+o-+SR!r z9oOgvF3JeZmQ<(^riO^$$ma^WpUGplB6+exV+t%`=K_-w(G3s=B<}8c%Sl?m%djb5 zkZ4dy3D5==XZkQpHshJ&67SEn%K!N44wVTi9e@?q{U(56tV2AJ<=q2m`}1Z&<`p z13&wN3(ojv!J2c0=;r1WZ%?HYNI|Maq>*NY^?}{=4vEyGoc{D%wB}{*B9o9&CE)Fo z;qoKZi5zAQx_0ahajW=A>a9Ntc3~C(MI;G-^-#Y$?R~a}4x7}lVSGtXFvEW1j3Ip( zbNl{_nF8%<4ut{Cbn+WB-T%Q%|9I-0?d)tFoc{e3gaS}pL;Xi}?Y|%q>4R5<%qVO$B zuD*5%#k=ntF(#|;vIGLtPi&WlI%phw~fSvmWRoo;Pck4dRd2qSFpR z7X=|<>qw&H{wj2B*q8JeP-924FqsBI0kJTK^+kjnx!sUO1U=C-bl}K=t(^}diSAzY3lq77YtRfID@V*%tv``q>s8( z#*Asjgb;LQx57Z*+$0!P^UDf{^~QcC)p-_bNsKnL^5M|KWGmW3vB}m#pn^Xz6HANI z*?~5wSnB98+2PqSMeXU6qQXh;_-ZBvEjRO7eCUZ9=xu@a`q4toR-yV4q6lVAodbGA z{KHrVT1ShO(K*iwCd6*~T{l^sq$9c6M=~fEWk2OF>q~2MAA6h94bve4?iz^bFKt`2 ztwN}1h4U&U^jpnVW7qmy$?eSL6;M8oN~1x}jA!W;Eg9K~G;+E;0QI@f@xdU74;z+R zM|psXmX@e-ScVp0^RX5A!>F5;-x?ZnSw!jT6~7USW>%>!m(_!qfB@aXT+&>HO>Ke+dyS|=W@iGS|f*7~# z?z8d%F{+;*Cb(>H(-zvAhk1wz7=#*PoLQ|e;eYCGZ*9(w(G-W)`X;(0@TA;MqN#>S z`z<9R&pr&>7+;LhR0CbQ@bY?^^ZJC@SI``wQL6<$fS0Ef>GK{h!2FSy7pu@uRc*fF z%r-IV3yeYg!EO7r<=SbrywqL|8&v<9NVWHxO(B)n$d>;n+2|PAGJhaW_J!%DJ=Qaj zJd0S=(v7}QoWzkMO79Zmyg<0vF`DEyq}X``m@1WE9RiGxHp5kW{TzUDSaoKJ3Wow} z6OSsUP-#pwp7z`qf~JI_WEmA*l#7$~Q?x#-jYfvl2V9rU_tu~~lJ5xt{EKGr3Y-sU zMbFrlZ-<=+G{s4aS_MeB2;h7c$N3<6@vFJ49=1KA*&Nz&!$W?w|nrn>0#s_8^QuE_dB>Gw#QddH{N1h z5x(6o^t8n^!bIl5c82JewA{RgZrFWDCO(Wn;D&gzl)%gy9L1OZfZh3W=f1>ew+d1F zVQY%a{n8isvHQ5Lc=glW_A3XG|100kyo-i~{#=3!^OMy@z(B|&!aXZTnjG0=nmBm@ zaAr>5_-V2z+Jub4qu|k?a|k7o+DyYcpeqePcWn>vfD6D#YrioP?H?kLaDS)B{mn@K ztErq5fRBESUk4^@fd2I^JEAv2?5{qNG{E7pdvp;-U(W~6TE|<1l5#1J(SHeGrxPp3 zJJ1Kedz*@@-+?p5s$oy;XQuUC9YH)qPF8(=16<5N-+N5|lEochtrCopgY<~?fD&2! z#zGqz6sK_YWn!PE6;nHprpgLXyo5Q}YB>cR1OEZ6evs=?oM`2c#Y*EHq}tQz5HA%B zlG~Ptj5B=`Z$HT0oO(^VN&UNOI-5pt@3I!{qzyd!0;?VH1}qZS+9D*ncF9L^?egd; zzZ#OZPHqpx9!MDTEC|c&pW*}g7gk10{-SO)IolFX*gqb+HniS&*1xe_&CJWiUzSx* z(}#SewB&=3m}9+TB?(I<3yy4@;;O_HrYR3Evv+mXwueI!kpc?x5u)9Kdu)yzNKVba ziyO<FmME~WSpHKD#Y z9K85)NGC*^Is_|TF`%_qe~I_o3!{SrOc%rzN-JA(Hjq3hf>KpKN%@p=kVi|3Kn}Q` z;Q2x&8QO@;2CF8SBq^MyRy<~5wUNY2As?o4XIC#ehM9cIYY}bWe3BfN7Z=WfK=lBM z#X;o6R*Bg|LjMLJ5tuDxZ|C@!IZ84sA^Lcn!@pEdSas2TN7ucN(2;Q=maK(ku?!I?~(!M2o54*W_Q6XdV&S&+) zFQohgC2rWAp{3;cjQ5KNh)=OussI&7n!gFLG5!DX!2h~9`2aZJ*Z9Q)fV0V89vYp$ z1%3iY1T$Uw0NML>1DbWtI*1gI!Q7yjN3@PVA|w^u%kthkNvKxC6Rd%8Gn!l2g0fg|rMEq9fmrrHCzj%7=IBod(3l#z`Mrk` zG_6tZhPOv$9~&H1gH>S}SF|!UMBGS7VINZ!eGQlBYd>l-m7VfW|6^iS4WjK|eeS$; zJOW+WPi8W+8oTM{L^rRHM=N#H^qMG7%U_hky#{#m5Wga&1O}FNiR@l?eQIOynkunh z55Z_bAgy^QWL#56a0u6(ckmcZN*?{BB?WISbL2iXTAUh?H0J@b^Mm%=g*kNgZdd5+ znZS2YH*#ug$b!JZr)}V%WHYclXE>PlUzfU&2zhKB}$`6~uFtH7}_=BQWvR+U9kEf__koQqg&3(N` zh(8|3tY&Ll(QDJ%b7c8sMd?V%CNS3rSPOt1)sSlEX;}ggYmky3O^h)!wTQ-br4N*21Q^rp}*Km>c_4Ng?o?sK7JpObQB-sk2mp*(fWO99I z))8aXO<0)%{>(=weW}So9w5t=DK{1H`YKdT2#%|M08z9vSBGp)Yn}>wd~YWSBWBJ( z@}M(f02TAelwN%si`04xJ8L@mTa2U(t0VD?0t4*ohxpy!GA2Iq}piP!s4c04kK|Z^nCbi)+{1!5D#U8!lpKaiM&fBO!q& zLUj7sgHhZ-h2C2sYi0;84l>U*zI{Qc-4xM=Sxy;%!|idSD`6F24V|P=%?H*O zgv*01P+!ZSQ)&f!psJc%-J=bBj`tg%#-#;)kP(iPSa|!Z)zb6MTd@e> zoI?1U^O*e)e)>C=Ux{8(Lg?S`O`!l@`ZfM?>inY<`Kz*M%c~TYQt;Gi0N_1dpHu5_ z?g4m@8w&{ll|_zm;(mphrGD?8F=7%v<}=-2k37B4TX?1^YPNX!l2@7m$$hGC1RFd@ z4tu-GI_e?dHuWeiK|$<{Nfv3lM{{9hE>C5YBblf@{J+#DxXYA7^CP}FnDta4W+H%j z08`GORR+ZwWEWtFi{wplyT#g&DKm--fk&gfni~;GuNMWR#aGKl15#qOlI_`a^g4*h zfs+MP)4rH}2(lC9bD*Ar?m2{@94@#;r8aagzb==n^nkbQR*pvzP!ktA#EEHBGJSBO z;~)xQ=nx|7GT`^?qAWrNyCR150EJ(`-ZzMD`K%$D72C4Uz(FICRjPFS2=-MATPr+y zU5?SH7bNIq+th)T`&r7ck@bF%E?^leul?Sf2ltUw6Qwfh-B%rEYzU%#{#d-5mN=4Q-0Lzj-ed@bHrfv7QkqbgFfJ?%6$n-rIX?s`^0 zv&^+3{kpdE{!1!9Brv?}EbW_J@~tIqA(GC-xn|fr!f12a8_fqwI_fjy51UdVlzBGA z9JjL>k5o774x*YPd=sQhROUX1R|0gk%qW2hl0%UfYF z(vIDZ->w%S^>AVJF53rSH?jn=Z{M^F&IYEQ@si##aojKXOK9|2rT7ZQ$?l%=E(Pp( z2nOxmxPupM4IUo-C-~W(^rsLEAZu7W*vcJM#*0Ss(^+q8yIdhYaH@jlTw8p2GwJQSaZFlh51JghO#i+bh}+Ah5s24gHG|6X^E*GI z*ZW-@l+AIoNdfNsOjZ6eDI^+9h!?r|i?w8u$YMfj;wwF`rAdOO?M+Y-A1}ucfenF5 zJmKqsZR+UgmOamdAZQmG{@avWv+dX;?0q!K08zi|2kFies;Fnd2QevOig&mCjyEUu?&T!ODH_m(w{ zr5heSvsXwTV?1y$&CdFCIfi^95!m)x6(!`Z*`!U6{Wy-aNAV*lvQ$|hz0_Cesd%nq z3iQyjw`vMGu zo91zT-s1Z7K-NA`qGj&sX7@7l+NU{M^~ty0$Dk~TrqkSCBRzhR1dD9V>OzQ(I1T-2H^ZC6iue3}IOD4AFoz@ze=eQc5oTbPc$U;_ zG|AK}ip)khUs1O`mptgzg;bbigWwr|eRcZgOi+=b1LnKHAp1}lJr&5WvrHoAlJS9F ztSf>cO5l15rQ_CXQdF~!YxG}D%h}Q7txV`k(A}Uv~mYy zB?I~%iOGDeo_CS`l?7YuK(V_pv<$z)8^`4pyPP)=D&ePQDp=$eQtAhfT|)SEu9F)U zHf98NqkUFj8@NTJugRUQ&~Ro_7YNARZcL@Y{7&T_oYCtrhn=$>@sxqptX2C6J_DT^ zcwr%i6&@5dlP>!B1Y0Aok zmJ_S@051S%|3fFCh#<zyy{U?Lbx*m1UO^Q7sK$>u%uyx97ul{qL`#!-xunDFGJ^I#7g2v)<7 z#!3WII%X*5zWRERk}dZd19o@Z!9MTx>--mF39ME}q2^r1#KEp-C$R&ENJUrbFX&(R zk959cc7$Zow^lVH4N_PnxeD`@ITr}l)WUwwe=#ExZ7Bvl7A1oN2&K(?XUFb1oHa$D zhwoVRI#I-QuCGP=_TzmcDID!%V`cq8qt!?h72R1ig*#w(!y|d`T=h8P9fK}S6Z7IH zYWLB?z2Y0nm`oc}X14s-C#Fo@sTS4IHi3;WA}^Uk6k#C>s5WG8xL7AS4y_+O=cGrO z@gg(MF*1|D^(oxYuSlh@s@r|04#QJ2=QN96eLcJ~VNHbe3f@WD)S2R~I% z&e^y7Kx0}7O@pM@ONIAkfEPj_fC$WBUkJ>-)kI~M9NZ>&G&9ozL8e})p2kw-@p%eV zs^-@%;-rOnE`T~O8fvrsc-^6D*&3%OB(R!Cjj0Y@JJ3_Q(mu8zUxOiNhIhT0;-^^d{{5d9t0xA#cSsI|GMopLw&jj-1dZi^ZJPV!6ttv$OG6!QC$o` zDF2!uUjQJJU*i{{G^l#o&Wa`E(=;X z{VWdneCVdDib%j%b`x$dd>9S8`aiqM8ohefc27jo49n6QN zKuym2w@5rxh{7q7$hT%p6FW+U8=MW)ba~pvnoFkV2ue35;=XXwuUKrb6lz6C(Se&@ zQu~~16o-g8kL6nwT#~x{cgi3GX(F1sIc-x^UI7!9Q-zE7on zt}2&6*!n)5yW7R@OCqMzpK_+I41P|yzXu)njj9G_(rb0j_1?Ae4y%1BkaX#RxyXVf zFp9sFv4&)L@W*`)1hRl zNUzgn@JGZ(l_a3c1|NQa5N@>4p1GyyRZW)ELx%cpS@})?4>oi1u zP*Hij>M8L`eLjtRt$bU$J4<8KdG`*%(QW2*N$9lm@S%QzVV7Lo7(yN)-_eTl4Sd^N;z1N7YqfOtC6Z-egQ9&=zpB6p?o`i>p&rXs!^pOpN zunh1Lh+<0w_H*!gyH5j?e)JAa-ZkgXD&tk6JS*S)?N=L9ENNFgtfe-O=LR~Qd>eyT zCzIbQf!mk0m&V^xG_(&F(PxyzU=-tcBs>SZYP%(3=qbx+~P*Ao?(v+u$Q3f`3LIE?Vh*_o+MAHzTVx ziBgmp93L6kfkhZ3@5t(~88z53^NLbY=OqZ~W~h|fe|+e53u9;}NI6*ZzJh7T3YgVM zP4RAT#|vUHOa0MB4RljEQlx8uf8S6rholwMypLSc$)hBuoSN7X3 zN%QM{;fy|w+1>i^Id?HHi%QL0dCUii{nISeqQK7vgf~^2+BMj)2;aX#7AE&z$cErR zK*mV_-K$gekIn1vUY)A{qf^WvV9Of%XWN=rf40TpKz`%>^cEuEAfkG1Q4o@33Gyt* zFzfSW&;tCHAnma-PFWv(pn{tQPzQ?=JnV~y`h(H+N%>x1h9s4%%|x&)6yjQ<1TiR;E=iPTNws6f9J)E(75$ zV5~ZgccLy+D24Fzv+ADN?iLo$CCwI?NCO`nSBWSRPbXvbufS4vs+e%2CB>Fv4T_s$ z@r+kC_JTG;$oN^sylqH`PAkeI+B}T;X^_CNNO6QMGL#A4*pWUNmpN`fOmRjqLD(-9 zXc#7$kysVoOn{W6Ah7&96JgmzQQHgF<;SAgD0Wk%5sdrq}i(`6>tKJ>tyhK6-Ql7iVh@Y%f73b2xGaZ41Y{t#4F@V6_Qk zUFfAE^WuD782DlGHkz~=I~4{WOf74hxcRkvU3omfnruT3TM@gfGUF^N2wIZ2qlGFL zst;@WB+JUNj$UxB)tvKoKT`6TzP9$RG3Nt1)=q}Ip<$M}>Mo9_0O17I$KuaL!;o2p zjNqAVU}8v%)6rDrtbKKHlqP)%JdBp8*v;&cIDPSLt$9{K5bi%Es!f}3OT4JJB4e16 zx0S20lH%XMZTrRYw~R!Ki^QP`v5Tp=X~RXHI3VJYBorvOvJeoU%wmlObP~^qVf)49 zTe^X&U?3ohUCk1n{gT(9P{@;Ppt*`?NVVfv_r?n`)yuE)%gODw@ilj; zy|PXkZ4?iilm{8j6OtygmPGpp36RHxsgEW9b0j53+|vZEsFa zKzL*D7^kJ`IqiCDzrly@8O8@*<}b%s86hMm(vZ19@^kmFsU#J-!-IgaQP4k)bPsamKLJp_K1 z%t+@p#vBGTYm0s)E^~ut`0C@RZ)DaPhwK`ZG+DY72v)6OQYJcab@P^*mxCp-MP~{_ zjfR6#kE~6zC0Qt}Tq>Vv5DPuIhIp2-c0ZsDq>f+Y+W1_#bP?MJ2DEB zdx4tk9RjXpD1zQH2%khoTHHvN_}X9bQ0i^DgJQ|(dPYY?;^Zf>ZZWd$I^a`aXTMU{ zw%IFzva>bag|5U4y*u&BURGuep@6{|~r*CkM=Xqt4`sw!0tO}&>S&Q-KNO+>~Y^8;8cv zh3>ov;c^%yS?9BeY`tziEAV1fiWy!qaF*_JxOA)g+B%ZoSfdIox_`O=C0>VIWhugW z=$FW^7-&Na&^+;3iQv62_I25~Pt5b;uRfI>7t`6Y3WPHiC`(F$M2KgxM%nmwkMYvyACP>t*{uItIH`OdTn&;HjR zb-12F7dSOeKd89rJJ^mXQ21=RQl;D zKEpSYX%%%PSh%l5OQeA-)T@`VXeJdg+CD%GhVWt)0@>x=VXxQ41DY02TlSBPtxPu=S#Y;o4=VlsdWhZ37tm*= zPp>_HPYLl9G%pBp)zFkXniPpwy+!`;;}8dOMRrQgb%N&-@{GdIU4={V`V7QH`I>~d z+Rga^-hT^^+6^^mK9kr?L}|zJ8no?e?sneTyW|IWB(eR(BoMhcU^Wqg7FVg4ui6X;#w*RKi;|3@_u3<&rXgVEwYIFLY)Ky5BlTAZGr zZ@U3E=1+kC1M^=vjeej0!VxyMa?=0TmL~wfnKGu|0LKA(~HjaQRTjegHHY0NU?w)|&1gG&#k8pKRkFaJbG>T4+Oy^;Up#uPVSof&cB{i7@{S zC-1Cp<794S{O_3Gi+^7&?f?@}8(*&n053!q{m<76$N4wr|5_{@6~Ci_0+Jz*DzDto z02YD*EQI~dLQ4LB(EgWZ1K<&g{ehSVm@`Gb-1Q{j5q|nV-?kLt{~-Q*Su_B!v*dRZ zlK_Akmm^sR06;N-iNCLyE{XpD{;w%l{DB7o=yK_vi|(rdz|;OeE9On+AH07nhyhsV z^vC++5>Jw|bmiQ91pxv&0=$ynt*ci28%@c<-1^_iVB#fP=fBzSgPOl=ou7N!Hvsbq z0m$I@Z!f6o-+0PG|BeR5wy*u)Xi$K;_HkL!VFBh^2B4SU(O`7{M*G*)62M#o{+O!* zV6iZDlL@l`Y%9V0lSn-bf1!yfh=>6K!vDUO*Z){6+5kW{EC|yKz=PvO0RjU5ZS{d$ z{sI&i|97mPe_$B{u&`;hc{l*CRGr~ZtpBI4GX{w|48!;xSS^-)x<8W?((V3onzdolQQ7Dv!WjLnYg^-Hq)L;rQb zx{e|Sb-{D6x=!zI=D->T%evsvtB4iM@&oO5f;J0UI#V60ftG_CEG_jWqInL@BxrZA zv_5}LFI2JmALE>{w3GQdE!&q3d*C3kGaVxU$hBq)MbK){u4psf+ewjc=&l~=wp0^H<61=L?dBx0O=Lgclg1mrHkXK<8_EZ%) zyEoljjbUM#Pw*)7sXS`WsZ^9Tb$31Hg_P((qY9nUC@-Z_Cw6rAH)5K5PEc%0c~2ma zqEZ0%DBJ!N^E|l}jF56EN~DB<_6E219(^Iv5flK8VggEIKXGl?YwhjcifDf1!_MN$ zW1C-XxoJnY|1IKqkq?{xDvfPAwXL0veCD=eniu)7VW;BQ3s3t(*};wNL^$u9U{^e) zv6qy#U9qm6`5x0Qh*H>9N{MXOqJ{W&8yNl3AiBRkHf~WM>-V+f)_S-0vjOx>W;MU^ zSg5O&b#?-qf5$v0N@3Zq(pZhB)i9Pe{U@S%TORB3l*S4g&1AQe*YdYcbL(laG+0lg zKqkOzO6*m$>#h6~pm$I*6<(3drPVApi(Eetq-Qp>Y8A=!H%$z&OQipZNG}-4O{j+b6-GR$#3@~`( a4;F9yvkxvI-btnI;d7%EF9_Gt_;wC&V5E)! literal 0 HcmV?d00001 diff --git a/R/Deltakl.r b/R/Deltakl.r index 9dff471..ed67e8f 100644 --- a/R/Deltakl.r +++ b/R/Deltakl.r @@ -33,11 +33,13 @@ #' @seealso \code{\link{Pik}}, \code{\link{Pikl}}, \code{\link{VarHT}} #' #' @examples -#' N <- 5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) #' n <- 2 #' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -#' Delta <- Deltakl(N, n, p) -#' Delta +#' sum(p) +#' # Variance-Covariance matrix of the sample membership indicators +#' Deltakl(N, n, p) Deltakl <- function(N, n, p) { Ind <- Ik(N, n) diff --git a/R/Domains.r b/R/Domains.r index 6feb92d..d06f853 100644 --- a/R/Domains.r +++ b/R/Domains.r @@ -29,15 +29,47 @@ #' @seealso \code{\link{E.SI}}, \code{\link{E.STSI}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # This domain contains only two categories: "yes" and "no" +#' x <- as.factor(c("yes","yes","yes","no","no","no","no","yes","yes")) +#' Domains(x) +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a random sample of units according +#' # to a SI design +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n <- 400 -#' sam <- S.SI(N, n) -#' # Level has 3 domains: Small, Medium, Big -#' dom <- Domains(Level[sam]) -#' head(dom) -#' colSums(dom) # sample sizes per domain +#' +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- sample(N,n) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' # The variable SPAM is a domain of interest +#' Doma <- Domains(SPAM) +#' Doma +#' # HT estimation of the absolute domain size for every category in the domain +#' # of interest +#' E.SI(N,n,Doma) +#' +#' ############ +#' ## Example 3 +#' ############ +#' # Following with Example 2... +#' # The variables of interest are: Income, Employees and Taxes +#' # This function allows to estimate the population total of this variables for every +#' # category in the domain of interest SPAM +#' estima <- data.frame(Income, Employees, Taxes) +#' SPAM.no <- estima*Doma[,1] +#' SPAM.yes <- estima*Doma[,2] +#' E.SI(N,n,SPAM.no) +#' E.SI(N,n,SPAM.yes) Domains <- function(y) { y <- as.factor(y) diff --git a/R/E.1SI.R b/R/E.1SI.R index 28a7d9e..6cb483a 100644 --- a/R/E.1SI.R +++ b/R/E.1SI.R @@ -62,7 +62,7 @@ E.1SI <- function(NI, nI, y, PSU) { Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) rownames(Total) = c("Estimation", "Standard Error", "CVE", - "DEFF") + "DEFF") colnames(Total) <- names(y) fI <- nI/NI @@ -79,6 +79,4 @@ E.1SI <- function(NI, nI, y, PSU) { Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } return(Total) -} - - +} \ No newline at end of file diff --git a/R/E.2SI.r b/R/E.2SI.r index 69dea93..c44b522 100644 --- a/R/E.2SI.r +++ b/R/E.2SI.r @@ -39,30 +39,119 @@ #' @seealso \code{\link{E.1SI}}, \code{\link{E.UC}} #' #' @examples -#' library(TeachingSampling) -#' data('BigCity') -#' library(dplyr) -#' Households <- BigCity %>% -#' group_by(HHID) %>% -#' summarise(PSU = unique(PSU), -#' Persons = n(), -#' Income = sum(Income), -#' Expenditure = sum(Expenditure)) -#' -#' UI <- levels(as.factor(Households$PSU)) +#' ############ +#' ## Example 1 +#' ############ +#' # Uses Lucy data to draw a twostage simple random sample +#' # accordind to a 2SI design. Zone is the clustering variable +#' data(Lucy) +#' attach(Lucy) +#' summary(Zone) +#' # The population of clusters or Primary Sampling Units +#' UI<-c("A","B","C","D","E") #' NI <- length(UI) -#' nI <- 10 -#' samI <- S.SI(NI, nI) -#' sampleI <- UI[samI] -#' CityI <- Households[Households$PSU %in% sampleI, ] -#' -#' Ni <- as.numeric(table(CityI$PSU)) -#' ni <- ceiling(Ni * 0.2) +#' # The sample size is nI=3 +#' nI <- 3 +#' # Selects the sample of PSUs +#' samI<-S.SI(NI,nI) +#' dataI<-UI[samI] +#' dataI +#' # The sampling frame of Secondary Sampling Unit is saved in Lucy1 ... Lucy3 +#' Lucy1<-Lucy[which(Zone==dataI[1]),] +#' Lucy2<-Lucy[which(Zone==dataI[2]),] +#' Lucy3<-Lucy[which(Zone==dataI[3]),] +#' # The size of every single PSU +#' N1<-dim(Lucy1)[1] +#' N2<-dim(Lucy2)[1] +#' N3<-dim(Lucy3)[1] +#' Ni<-c(N1,N2,N3) +#' # The sample size in every PSI is 135 Secondary Sampling Units +#' n1<-135 +#' n2<-135 +#' n3<-135 +#' ni<-c(n1,n2,n3) +#' # Selects a sample of Secondary Sampling Units inside the PSUs +#' sam1<-S.SI(N1,n1) +#' sam2<-S.SI(N2,n2) +#' sam3<-S.SI(N3,n3) +#' # The information about each Secondary Sampling Unit in the PSUs +#' # is saved in data1 ... data3 +#' data1<-Lucy1[sam1,] +#' data2<-Lucy2[sam2,] +#' data3<-Lucy3[sam3,] +#' # The information about each unit in the final selected sample is saved in data +#' data<-rbind(data1, data2, data3) +#' attach(data) +#' # The clustering variable is Zone +#' Cluster <- as.factor(as.integer(Zone)) +#' # The variables of interest are: Income, Employees and Taxes +#' # This information is stored in a data frame called estima +#' estima <- data.frame(Income, Employees, Taxes) +#' # Estimation of the Population total +#' E.2SI(NI,nI,Ni,ni,estima,Cluster) #' -#' estima <- data.frame(CityI$Persons, CityI$Income, CityI$Expenditure) -#' area <- as.factor(CityI$PSU) -#' -#' E.2SI(NI, nI, Ni, ni, estima, area) +#' ######################################################## +#' ## Example 2 Total Census to the entire population +#' ######################################################## +#' # Uses Lucy data to draw a cluster random sample +#' # accordind to a SI design ... +#' # Zone is the clustering variable +#' data(Lucy) +#' attach(Lucy) +#' summary(Zone) +#' # The population of clusters +#' UI<-c("A","B","C","D","E") +#' NI <- length(UI) +#' # The sample size equals to the population size of PSU +#' nI <- NI +#' # Selects every single PSU +#' samI<-S.SI(NI,nI) +#' dataI<-UI[samI] +#' dataI +#' # The sampling frame of Secondary Sampling Unit is saved in Lucy1 ... Lucy5 +#' Lucy1<-Lucy[which(Zone==dataI[1]),] +#' Lucy2<-Lucy[which(Zone==dataI[2]),] +#' Lucy3<-Lucy[which(Zone==dataI[3]),] +#' Lucy4<-Lucy[which(Zone==dataI[4]),] +#' Lucy5<-Lucy[which(Zone==dataI[5]),] +#' # The size of every single PSU +#' N1<-dim(Lucy1)[1] +#' N2<-dim(Lucy2)[1] +#' N3<-dim(Lucy3)[1] +#' N4<-dim(Lucy4)[1] +#' N5<-dim(Lucy5)[1] +#' Ni<-c(N1,N2,N3,N4,N5) +#' # The sample size of Secondary Sampling Units equals to the size of each PSU +#' n1<-N1 +#' n2<-N2 +#' n3<-N3 +#' n4<-N4 +#' n5<-N5 +#' ni<-c(n1,n2,n3,n4,n5) +#' # Selects every single Secondary Sampling Unit inside the PSU +#' sam1<-S.SI(N1,n1) +#' sam2<-S.SI(N2,n2) +#' sam3<-S.SI(N3,n3) +#' sam4<-S.SI(N4,n4) +#' sam5<-S.SI(N5,n5) +#' # The information about each unit in the cluster is saved in Lucy1 ... Lucy5 +#' data1<-Lucy1[sam1,] +#' data2<-Lucy2[sam2,] +#' data3<-Lucy3[sam3,] +#' data4<-Lucy4[sam4,] +#' data5<-Lucy5[sam5,] +#' # The information about each Secondary Sampling Unit +#' # in the sample (census) is saved in data +#' data<-rbind(data1, data2, data3, data4, data5) +#' attach(data) +#' # The clustering variable is Zone +#' Cluster <- as.factor(as.integer(Zone)) +#' # The variables of interest are: Income, Employees and Taxes +#' # This information is stored in a data frame called estima +#' estima <- data.frame(Income, Employees, Taxes) +#' # Estimation of the Population total +#' E.2SI(NI,nI,Ni,ni,estima,Cluster) +#' # Sampling error is null E.2SI <- function(NI, nI, Ni, ni, y, PSU) { y <- cbind(1, y) diff --git a/R/E.Beta.r b/R/E.Beta.r index 77ce598..9cc862c 100644 --- a/R/E.Beta.r +++ b/R/E.Beta.r @@ -40,14 +40,88 @@ #' @seealso \code{\link{GREG.SI}}, \code{\link{E.SI}} #' #' @examples -#' data('Lucy') +#' ###################################################################### +#' ## Example 1: Linear models involving continuous auxiliary information +#' ###################################################################### +#' +#' # Draws a simple random sample without replacement +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) +#' +#' N <- dim(Lucy)[1] #' n <- 400 #' sam <- S.SI(N, n) -#' y <- data.frame(Income = Income[sam]) -#' x <- data.frame(Employees = Employees[sam]) -#' E.Beta(N, n, y, x, b0 = TRUE) +#' # The information about the units in the sample +#' # is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' +#' ########### common mean model +#' +#' estima<-data.frame(Income, Employees, Taxes) +#' x <- rep(1,n) +#' E.Beta(N, n, estima,x,ck=1,b0=FALSE) +#' +#' +#' ########### common ratio model +#' +#' estima<-data.frame(Income) +#' x <- data.frame(Employees) +#' E.Beta(N, n, estima,x,ck=x,b0=FALSE) +#' +#' ########### Simple regression model without intercept +#' +#' estima<-data.frame(Income, Employees) +#' x <- data.frame(Taxes) +#' E.Beta(N, n, estima,x,ck=1,b0=FALSE) +#' +#' ########### Multiple regression model without intercept +#' +#' estima<-data.frame(Income) +#' x <- data.frame(Employees, Taxes) +#' E.Beta(N, n, estima,x,ck=1,b0=FALSE) +#' +#' ########### Simple regression model with intercept +#' +#' estima<-data.frame(Income, Employees) +#' x <- data.frame(Taxes) +#' E.Beta(N, n, estima,x,ck=1,b0=TRUE) +#' +#' ########### Multiple regression model with intercept +#' +#' estima<-data.frame(Income) +#' x <- data.frame(Employees, Taxes) +#' E.Beta(N, n, estima,x,ck=1,b0=TRUE) +#' +#' ############################################################### +#' ## Example 2: Linear models with discrete auxiliary information +#' ############################################################### +#' +#' # Draws a simple random sample without replacement +#' data(Lucy) +#' attach(Lucy) +#' +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- S.SI(N,n) +#' # The information about the sample units is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' # The auxiliary information +#' Doma<-Domains(Level) +#' +#' ########### Poststratified common mean model +#' +#' estima<-data.frame(Income, Employees, Taxes) +#' E.Beta(N, n, estima,Doma,ck=1,b0=FALSE) +#' +#' ########### Poststratified common ratio model +#' +#' estima<-data.frame(Income, Employees) +#' x<-Doma*Taxes +#' E.Beta(N, n, estima,x,ck=1,b0=FALSE) E.Beta <- function(N, n, y, x, ck = 1, b0 = FALSE) { if (b0 == TRUE) { diff --git a/R/E.Quantile.r b/R/E.Quantile.r index cc234d6..48c580b 100644 --- a/R/E.Quantile.r +++ b/R/E.Quantile.r @@ -32,18 +32,32 @@ #' @seealso \code{\link{E.SI}}, \code{\link{E.piPS}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' y <- c(32, 34, 46, 89, 35) +#' x <- c(52, 60, 75, 100, 50) +#' z <- cbind(y, x) +#' Pik <- c(0.58, 0.34, 0.48, 0.33, 0.27) +#' E.Quantile(y, 0.5) +#' E.Quantile(x, 0.25) +#' E.Quantile(z, 0.75) +#' E.Quantile(z, 0.5, Pik) +#' ############ +#' ## Example 2 +#' ############ +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n <- 400 -#' sam <- S.SI(N, n) -#' Pik <- rep(n/N, n) -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' -#' # Median -#' E.Quantile(y, Qn = 0.5, Pik = Pik) -#' # First quartile -#' E.Quantile(y, Qn = 0.25, Pik = Pik) +#' m <- 400 +#' res <- S.PPS(m, Income) +#' sam <- res[, 1] +#' pk.s <- res[, 2] +#' Pik.s <- 1 - (1 - pk.s)^m +#' data <- Lucy[sam, ] +#' attach(data) +#' estima <- data.frame(Income, Employees, Taxes) +#' E.Quantile(estima, 0.5, Pik.s) E.Quantile <- function(y, Qn, Pik) { y <- as.data.frame(y) diff --git a/R/E.SI.r b/R/E.SI.r index 54d0be1..dcfa53f 100644 --- a/R/E.SI.r +++ b/R/E.SI.r @@ -34,13 +34,89 @@ #' @seealso \code{\link{S.SI}}, \code{\link{E.STSI}}, \code{\link{GREG.SI}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # Uses the Lucy data to draw a random sample of units according to a SI design +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n <- 400 -#' sam <- S.SI(N, n) -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.SI(N, n, y) +#' +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- S.SI(N,n) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' # The variables of interest are: Income, Employees and Taxes +#' # This information is stored in a data frame called estima +#' estima <- data.frame(Income, Employees, Taxes) +#' E.SI(N,n,estima) +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Following with Example 1. The variable SPAM is a domain of interest +#' Doma <- Domains(SPAM) +#' # This function allows to estimate the size of each domain in SPAM +#' estima <- data.frame(Doma) +#' E.SI(N,n,Doma) +#' +#' ############ +#' ## Example 3 +#' ############ +#' # Following with Example 1. The variable SPAM is a domain of interest +#' Doma <- Domains(SPAM) +#' # This function allows to estimate the parameters of the variables of interest +#' # for every category in the domain SPAM +#' estima <- data.frame(Income, Employees, Taxes) +#' SPAM.no <- cbind(Doma[,1], estima*Doma[,1]) +#' SPAM.yes <- cbind(Doma[,1], estima*Doma[,2]) +#' # Before running the following lines, notice that: +#' # The first column always indicates the population size +#' # The second column is an estimate of the size of the category in the domain SPAM +#' # The remaining columns estimates the parameters of interest +#' # within the corresponding category in the domain SPAM +#' E.SI(N,n,SPAM.no) +#' E.SI(N,n,SPAM.yes) +#' +#' ############ +#' ## Example 4 +#' ############ +#' # Following with Example 1. The variable SPAM is a domain of interest +#' # and the variable ISO is a populational subgroup of interest +#' Doma <- Domains(SPAM) +#' estima <- Domains(Zone) +#' # Before running the following lines, notice that: +#' # The first column indicates wheter the unit +#' # belongs to the first category of SPAM or not +#' # The remaining columns indicates wheter the unit +#' # belogns to the categories of Zone +#' SPAM.no <- data.frame(SpamNO=Doma[,1], Zones=estima*Doma[,1]) +#' # Before running the following lines, notice that: +#' # The first column indicates wheter the unit +#' # belongs to the second category of SPAM or not +#' # The remaining columns indicates wheter the unit +#' # belogns to the categories of Zone +#' SPAM.yes <- data.frame(SpamYES=Doma[,2], Zones=estima*Doma[,2]) +#' # Before running the following lines, notice that: +#' # The first column always indicates the population size +#' # The second column is an estimate of the size of the +#' # first category in the domain SPAM +#' # The remaining columns estimates the size of the categories +#' # of Zone within the corresponding category of SPAM +#' # Finnaly, note that the sum of the point estimates of the last +#' # two columns gives exactly the point estimate in the second column +#' E.SI(N,n,SPAM.no) +#' # Before running the following lines, notice that: +#' # The first column always indicates the population size +#' # The second column is an estimate of the size of the +#' # second category in the domain SPAM +#' # The remaining columns estimates the size of the categories +#' # of Zone within the corresponding category of SPAM +#' # Finnaly, note that the sum of the point estimates of the last two +#' # columns gives exactly the point estimate in the second column +#' E.SI(N,n,SPAM.yes) E.SI <- function(N, n, y) { y <- cbind(1, y) diff --git a/R/E.STPPS.r b/R/E.STPPS.r index 3eae683..72ad038 100644 --- a/R/E.STPPS.r +++ b/R/E.STPPS.r @@ -1,45 +1,73 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Stratified PPS With-Replacement Sampling +#' @description +#' Computes the Hansen-Hurwitz estimator of the population total under a +#' stratified PPS with-replacement (STPPS) sampling design. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector, matrix or data frame of variables of interest. +#' @param pk Vector of selection probabilities for each draw in the sample. +#' @param mh Integer vector with the number of draws within each stratum. +#' @param S Vector identifying the stratum membership of each unit in the sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.STPPS}}, \code{\link{E.PPS}}, \code{\link{E.STpiPS}} +#' +#' @examples +#' # Uses the Lucy data to draw a stratified random sample +#' # according to a PPS design in each stratum +#' data(Lucy) +#' attach(Lucy) +#' m1 <- 83; m2 <- 100; m3 <- 200 +#' mh <- c(m1, m2, m3) +#' res <- S.STPPS(Level, Income, mh) +#' sam <- res[, 1] +#' pk <- res[, 2] +#' data <- Lucy[sam, ] +#' attach(data) +#' estima <- data.frame(Income, Employees, Taxes) +#' E.STPPS(estima, pk, mh, Level) -E.STPPS<-function(y,pk,mh,S){ - S<-as.factor(S) - y<-cbind(1,y) - y<-as.data.frame(y) +E.STPPS <- function(y, pk, mh, S) { + S <- as.factor(S) + S <- as.factor(as.integer(S)) + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - pk<-as.data.frame(pk) - - Strata<-array(NA,c(4,length(mh)+1,dim(y)[2])) - rownames(Strata)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Strata)<-c(levels(S),"Population") - dimnames(Strata)[[3]]<-names(y) - S<-as.factor(as.integer(S)) - - for(k in 1: length(mh)){ - e<-which(S==k) - ye<-y[e,] - pke<-pk[e,] - ye<-as.matrix(ye) - tye<-matrix(1,1,dim(ye)[1])%*%(ye/pke)/mh[k] - tye2<-t(matrix(tye,dim(ye)[2],mh[k])) - Vtye<-(1/mh[k])*(1/(mh[k]-1))*colSums((ye/pke-tye2)^2) - CVe<-100*sqrt(Vtye)/tye - Nh<-(1/mh[k])*sum(1/pke) - VMAS<-as.vector((Nh^2)*(1-(mh[k]/Nh))*diag(var(ye))/(mh[k])) - DEFF<-Vtye/VMAS - Strata[1,,][k,]<-tye - Strata[2,,][k,]<-sqrt(Vtye) - Strata[3,,][k,]<-CVe - Strata[4,,][k,]<-DEFF + H <- length(mh) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + N <- sum(1/pk) + n <- length(pk) + for (k in 1:dim(y)[2]) { + ty <- 0 + Vty <- 0 + for (h in 1:H) { + yh <- y[which(S == h), k] + pkh <- pk[which(S == h)] + HHh <- sum(yh/pkh)/mh[h] + ty <- ty + HHh + Vty <- Vty + (1/mh[h]) * (1/(mh[h] - 1)) * sum((yh/pkh - HHh)^2) + } + CVe <- 100 * sqrt(Vty)/ty + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } - - m=sum(mh) - - for(i in 1:dim(y)[2]){ - Strata[1,,][(length(mh)+1),][i]<-sum(Strata[,,i][1,][1:length(mh)]) - Strata[2,,][(length(mh)+1),][i]<-sqrt(sum(Strata[,,i][2,][1:length(mh)]^2)) - Strata[3,,][(length(mh)+1),][i]<-100*Strata[2,,][(length(mh)+1),][i]/Strata[1,,][(length(mh)+1),][i] - N <- Strata[1, "Population", "N"] - VMAST<-(N^2)*(1-(m/N))*var(y[,i])/(m) - Strata[4,,][(length(mh)+1),][i]<-(Strata[2,,][(length(mh)+1),][i]^2)/(VMAST) - } - return(Strata) -} + return(Total) +} \ No newline at end of file diff --git a/R/E.STSI.r b/R/E.STSI.r index 17a7634..b76e1d1 100644 --- a/R/E.STSI.r +++ b/R/E.STSI.r @@ -1,41 +1,86 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Stratified Simple Random Sampling +#' @description +#' Computes the Horvitz-Thompson estimator of the population total under a +#' stratified simple random sampling without replacement (STSI) design. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @author Hugo Andres Gutierrez Rojas +#' @param S Vector identifying the stratum membership of each unit in the sample. +#' @param Nh Integer vector with the population size of each stratum. +#' @param nh Integer vector with the sample size of each stratum. +#' @param y Vector, matrix or data frame of variables of interest. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.STSI}}, \code{\link{E.SI}}, \code{\link{E.STpiPS}} +#' +#' @examples +#' ############ +#' ## Example 1 +#' ############ +#' data(Lucy) +#' attach(Lucy) +#' N1 <- summary(Level)[[1]] +#' N2 <- summary(Level)[[2]] +#' N3 <- summary(Level)[[3]] +#' Nh <- c(N1, N2, N3) +#' n1 <- N1; n2 <- 100; n3 <- 200 +#' nh <- c(n1, n2, n3) +#' sam <- S.STSI(Level, Nh, nh) +#' data <- Lucy[sam, ] +#' attach(data) +#' estima <- data.frame(Income, Employees, Taxes) +#' E.STSI(Level, Nh, nh, estima) +#' ############ +#' ## Example 2 +#' ############ +#' # The variable SPAM is a domain of interest +#' Doma <- Domains(SPAM) +#' SPAM.no <- estima * Doma[, 1] +#' SPAM.yes <- estima * Doma[, 2] +#' E.STSI(Level, Nh, nh, Doma) +#' E.STSI(Level, Nh, nh, SPAM.no) +#' E.STSI(Level, Nh, nh, SPAM.yes) -E.STSI<-function(S,Nh,nh,y){ - S<-as.factor(S) - y<-cbind(1,y) - y<-as.data.frame(y) +E.STSI <- function(S, Nh, nh, y) { + S <- as.factor(S) + S <- as.factor(as.integer(S)) + y <- cbind(1, y) + y <- as.data.frame(y) names(y)[1] <- "N" - - Strata<-array(NA,c(4,length(nh)+1,dim(y)[2])) - rownames(Strata)=c("Estimation", "Standard Error","CVE","DEFF") - colnames(Strata)<-c(levels(S),"Population") - dimnames(Strata)[[3]]<-names(y) - S<-as.factor(as.integer(S)) - - for(k in 1: length(nh)){ - e<-which(S==k) - ye<-y[e,] - ye<-as.matrix(ye) - tye<-matrix(1,1,dim(ye)[1])%*%(ye*(Nh[k]/nh[k])) - Vtye<-diag((Nh[k]^2)*(1-(nh[k]/Nh[k]))*var(ye)/(nh[k])) - CVe<-100*sqrt(Vtye)/tye - VMAS<-diag((Nh[k]^2)*(1-(nh[k]/Nh[k]))*var(ye)/(nh[k])) - DEFF<-Vtye/VMAS - Strata[1,,][k,]<-tye - Strata[2,,][k,]<-sqrt(Vtye) - Strata[3,,][k,]<-CVe - Strata[4,,][k,]<-DEFF + H <- length(Nh) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + N <- sum(Nh) + n <- sum(nh) + fh <- nh/Nh + wh <- Nh/N + for (k in 1:dim(y)[2]) { + ty <- 0 + Vty <- 0 + for (h in 1:H) { + yh <- y[which(S == h), k] + ty <- ty + Nh[h] * mean(yh) + Vty <- Vty + Nh[h]^2 * (1 - fh[h]) * var(yh)/nh[h] + } + CVe <- 100 * sqrt(Vty)/ty + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } - - N=sum(Nh) - n=sum(nh) - - for(i in 1:dim(y)[2]){ - Strata[1,,][(length(nh)+1),][i]<-sum(Strata[,,i][1,][1:length(nh)]) - Strata[2,,][(length(nh)+1),][i]<-sqrt(sum(Strata[,,i][2,][1:length(nh)]^2)) - Strata[3,,][(length(nh)+1),][i]<-100*(Strata[2,,][(length(nh)+1),][i])/Strata[1,,][(length(nh)+1),][i] - VMAST<-(N^2)*(1-(n/N))*var(y[,i])/(n) - Strata[4,,][(length(nh)+1),][i]<-(Strata[2,,][(length(nh)+1),][i])^2/(VMAST) - } - return(Strata) + return(Total) } \ No newline at end of file diff --git a/R/E.STpiPS.R b/R/E.STpiPS.R index bca7604..1dbc750 100644 --- a/R/E.STpiPS.R +++ b/R/E.STpiPS.R @@ -1,56 +1,83 @@ #' @export +#' +#' @title +#' Estimation of the Population Total under Stratified piPS Sampling +#' @description +#' Computes the Horvitz-Thompson estimator of the population total under a +#' stratified without-replacement probability proportional to size (piPS) +#' sampling design. +#' @return +#' A matrix with four rows and one column per variable of interest: +#' \itemize{ +#' \item \code{Estimation}: Estimated population total. +#' \item \code{Standard Error}: Estimated standard error. +#' \item \code{CVE}: Estimated coefficient of variation (in percentage). +#' \item \code{DEFF}: Design effect with respect to simple random sampling. +#' } +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector, matrix or data frame of variables of interest. +#' @param Pik Vector of first-order inclusion probabilities for each unit +#' in the sample. +#' @param S Vector identifying the stratum membership of each unit in the sample. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{S.STpiPS}}, \code{\link{E.piPS}}, \code{\link{E.STSI}} +#' +#' @examples +#' # Uses the Lucy data to draw a stratified random sample +#' # according to a piPS design in each stratum +#' data(Lucy) +#' attach(Lucy) +#' N1 <- summary(Level)[[1]] +#' N2 <- summary(Level)[[2]] +#' N3 <- summary(Level)[[3]] +#' nh <- c(N1, 100, 200) +#' S <- Level +#' x <- Employees +#' res <- S.STpiPS(S, x, nh) +#' sam <- res[, 1] +#' pik <- res[, 2] +#' data <- Lucy[sam, ] +#' attach(data) +#' estima <- data.frame(Income, Employees, Taxes) +#' E.STpiPS(estima, pik, Level) -E.STpiPS <- function(y, pik, S) { +E.STpiPS <- function(y, Pik, S) { S <- as.factor(S) + S <- as.factor(as.integer(S)) y <- cbind(1, y) y <- as.data.frame(y) names(y)[1] <- "N" - pik <- as.data.frame(pik) - nh <- c(table(S)) - - Strata <- array(NA, c(4, length(nh) + 1, dim(y)[2])) - rownames(Strata) = c("Estimation", "Standard Error", "CVE", "DEFF") - colnames(Strata) <- c(levels(S), "Population") - dimnames(Strata)[[3]] <- names(y) - S <- as.factor(as.integer(S)) - - for (k in 1:length(nh)) { - nhe <- nh[k] - e <- which(S == k) - ye <- y[e, ] - pike <- pik[e, ] - ye <- as.matrix(ye) - tye <- matrix(1, 1, dim(ye)[1]) %*% (ye/pike) - #------------------- - ck <- (1 - pike) * (nhe/(nhe - 1)) - P1 <- as.matrix(colSums(ck * ye/pike)) - P2 <- sum(ck) - ystar <- t(P1 %*% t(pike/P2)) - P3 <- ck/(pike^2) - #-------------------- - if(sum(pike) == nhe){ - Vtye <- rep(0, times = dim(P1)[1]) - } else { - Vtye <- as.vector(colSums(P3 * ((ye - ystar)^2))) - } - CVe <- 100 * sqrt(Vtye)/tye - Nhe <- sum(1/pike) - VMAS <- as.vector((Nhe^2) * (1 - (nhe/Nhe)) * diag(var(ye))/(nhe)) - DEFF <- Vtye/VMAS - Strata[1, , ][k, ] <- tye - Strata[2, , ][k, ] <- sqrt(Vtye) - Strata[3, , ][k, ] <- CVe - Strata[4, , ][k, ] <- DEFF - } - - for (i in 1:dim(y)[2]) { - Strata[1, , ][(length(nh) + 1), ][i] <- sum(Strata[, , i][1, ][1:length(nh)]) - Strata[2, , ][(length(nh) + 1), ][i] <- sqrt(sum(Strata[, , i][2, ][1:length(nh)]^2)) - Strata[3, , ][(length(nh) + 1), ][i] <- 100 * Strata[2, , ][(length(nh) + 1), ][i]/Strata[1, , ][(length(nh) + 1), ][i] - N <- sum(1/pik) - n <- sum(nh) - VMAST <- (N^2) * (1 - (n/N)) * var(y[, i])/(n) - Strata[4, , ][(length(nh) + 1), ][i] <- (Strata[2, , ][(length(nh) + 1), ][i]^2)/(VMAST) + H <- length(levels(S)) + Total <- matrix(NA, nrow = 4, ncol = dim(y)[2]) + rownames(Total) <- c("Estimation", "Standard Error", "CVE", "DEFF") + colnames(Total) <- names(y) + n <- length(Pik) + N <- sum(1/Pik) + for (k in 1:dim(y)[2]) { + ty <- 0 + Vty <- 0 + for (h in 1:H) { + yh <- y[which(S == h), k] + pikh <- Pik[which(S == h)] + nh <- length(pikh) + ck <- (1 - pikh) * (nh/(nh - 1)) + P1 <- sum(ck * yh/pikh) + P2 <- sum(ck) + ystar <- pikh * P1/P2 + P3 <- ck/(pikh^2) + Vty <- Vty + sum(P3 * ((yh - ystar)^2)) + ty <- ty + sum(yh/pikh) + } + CVe <- 100 * sqrt(Vty)/ty + VMAS <- (N^2) * (1 - (n/N)) * var(y[, k])/(n) + DEFF <- Vty/VMAS + Total[, k] <- c(ty, sqrt(Vty), CVe, DEFF) } - return(Strata) -} + return(Total) +} \ No newline at end of file diff --git a/R/GREG.SI.r b/R/GREG.SI.r index 9db7e37..5f54082 100644 --- a/R/GREG.SI.r +++ b/R/GREG.SI.r @@ -40,16 +40,155 @@ #' @seealso \code{\link{E.Beta}}, \code{\link{E.SI}}, \code{\link{Wk}} #' #' @examples -#' data('Lucy') +#' ###################################################################### +#' ## Example 1: Linear models involving continuous auxiliary information +#' ###################################################################### +#' +#' # Draws a simple random sample without replacement +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n <- 400 -#' sam <- S.SI(N, n) -#' y <- data.frame(Income = Income[sam]) -#' x <- data.frame(Employees = Employees[sam]) -#' tx <- sum(Employees) -#' b <- E.Beta(N, n, y, x, b0 = FALSE) -#' GREG.SI(N, n, y, x, tx, b) +#' +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- S.SI(N,n) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' +#' ########### common mean model +#' +#' estima<-data.frame(Income, Employees, Taxes) +#' x <- rep(1,n) +#' model <- E.Beta(N, n, estima, x, ck=1,b0=FALSE) +#' b <- t(as.matrix(model[1,,])) +#' tx <- c(N) +#' GREG.SI(N,n,estima,x,tx, b, b0=FALSE) +#' +#' ########### common ratio model +#' +#' estima<-data.frame(Income) +#' x <- data.frame(Employees) +#' model <- E.Beta(N, n, estima, x, ck=x,b0=FALSE) +#' b <- t(as.matrix(model[1,,])) +#' tx <- sum(Lucy$Employees) +#' GREG.SI(N,n,estima,x,tx, b, b0=FALSE) +#' +#' ########### Simple regression model without intercept +#' +#' estima<-data.frame(Income, Employees) +#' x <- data.frame(Taxes) +#' model <- E.Beta(N, n, estima, x, ck=1,b0=FALSE) +#' b <- t(as.matrix(model[1,,])) +#' tx <- sum(Lucy$Taxes) +#' GREG.SI(N,n,estima,x,tx, b, b0=FALSE) +#' +#' ########### Multiple regression model without intercept +#' +#' estima<-data.frame(Income) +#' x <- data.frame(Employees, Taxes) +#' model <- E.Beta(N, n, estima, x, ck=1, b0=FALSE) +#' b <- as.matrix(model[1,,]) +#' tx <- c(sum(Lucy$Employees), sum(Lucy$Taxes)) +#' GREG.SI(N,n,estima,x,tx, b, b0=FALSE) +#' +#' ########### Simple regression model with intercept +#' +#' estima<-data.frame(Income, Employees) +#' x <- data.frame(Taxes) +#' model <- E.Beta(N, n, estima, x, ck=1,b0=TRUE) +#' b <- as.matrix(model[1,,]) +#' tx <- c(N, sum(Lucy$Taxes)) +#' GREG.SI(N,n,estima,x,tx, b, b0=TRUE) +#' +#' ########### Multiple regression model with intercept +#' +#' estima<-data.frame(Income) +#' x <- data.frame(Employees, Taxes) +#' model <- E.Beta(N, n, estima, x, ck=1,b0=TRUE) +#' b <- as.matrix(model[1,,]) +#' tx <- c(N, sum(Lucy$Employees), sum(Lucy$Taxes)) +#' GREG.SI(N,n,estima,x,tx, b, b0=TRUE) +#' +#' #################################################################### +#' ## Example 2: Linear models with discrete auxiliary information +#' #################################################################### +#' +#' # Draws a simple random sample without replacement +#' data(Lucy) +#' +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- S.SI(N,n) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' +#' # The auxiliary information is discrete type +#' Doma<-Domains(Level) +#' +#' ########### Poststratified common mean model +#' +#' estima<-data.frame(Income, Employees, Taxes) +#' model <- E.Beta(N, n, estima, Doma, ck=1,b0=FALSE) +#' b <- t(as.matrix(model[1,,])) +#' tx <- colSums(Domains(Lucy$Level)) +#' GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) +#' +#' ########### Poststratified common ratio model +#' +#' estima<-data.frame(Income, Employees) +#' x <- Doma*Taxes +#' model <- E.Beta(N, n, estima, x ,ck=1,b0=FALSE) +#' b <- as.matrix(model[1,,]) +#' tx <- colSums(Domains(Lucy$Level)*Lucy$Taxes) +#' GREG.SI(N,n,estima,x,tx, b, b0=FALSE) +#' +#' ###################################################################### +#' ## Example 3: Domains estimation trough the postestratified estimator +#' ###################################################################### +#' +#' # Draws a simple random sample without replacement +#' data(Lucy) +#' +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- S.SI(N,n) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' +#' # The auxiliary information is discrete type +#' Doma<-Domains(Level) +#' +#' ########### Poststratified common mean model for the +#' # Income total in each poststratum ################### +#' +#' estima<-Doma*Income +#' model <- E.Beta(N, n, estima, Doma, ck=1, b0=FALSE) +#' b <- t(as.matrix(model[1,,])) +#' tx <- colSums(Domains(Lucy$Level)) +#' GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) +#' +#' ########### Poststratified common mean model for the +#' # Employees total in each poststratum ################### +#' +#' estima<-Doma*Employees +#' model <- E.Beta(N, n, estima, Doma, ck=1,b0=FALSE) +#' b <- t(as.matrix(model[1,,])) +#' tx <- colSums(Domains(Lucy$Level)) +#' GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) +#' +#' ########### Poststratified common mean model for the +#' # Taxes total in each poststratum ################### +#' +#' estima<-Doma*Taxes +#' model <- E.Beta(N, n, estima, Doma, ck=1, b0=FALSE) +#' b <- t(as.matrix(model[1,,])) +#' tx <- colSums(Domains(Lucy$Level)) +#' GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) GREG.SI <- function(N, n, y, x, tx, b, b0 = FALSE) { y <- as.data.frame(y) diff --git a/R/HH.r b/R/HH.r index 0a32da5..218577e 100644 --- a/R/HH.r +++ b/R/HH.r @@ -1,17 +1,140 @@ #' @export +#' +#' @title +#' Hansen-Hurwitz Estimator of the Population Total +#' @description +#' Computes the Hansen-Hurwitz (HH) estimator of the population total under +#' a with-replacement sampling design, given the sample observations and +#' their selection probabilities. +#' @return +#' A numeric vector or matrix with the estimated total for each variable +#' of interest. +#' @details +#' The Hansen-Hurwitz estimator is: +#' \deqn{\hat{t}_{HH} = \frac{1}{m}\sum_{i=1}^m \frac{y_i}{p_i}} +#' where \eqn{p_i} is the selection probability of the \eqn{i}-th draw +#' and \eqn{m} is the number of draws. This estimator is design-unbiased +#' under any with-replacement sampling design. +#' @author Hugo Andres Gutierrez Rojas +#' @param y Vector or matrix of values of the variable(s) of interest for +#' units in the sample (with possible repetitions). +#' @param pk Vector of selection probabilities for each draw in the sample. +#' +#' @references +#' Hansen, M.H. and Hurwitz, W.N. (1943). On the theory of sampling from +#' finite populations. \emph{Annals of Mathematical Statistics}, 14, 333-362.\cr +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer. +#' +#' @seealso \code{\link{E.PPS}}, \code{\link{HT}}, \code{\link{S.PPS}} +#' +#' @examples +#' ############ +#' ## Example 1 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vectors y1 and y2 give the values of the variables of interest +#' y1<-c(32, 34, 46, 89, 35) +#' y2<-c(1,1,1,0,0) +#' y3<-cbind(y1,y2) +#' # The population size is N=5 +#' N <- length(U) +#' # The sample size is m=2 +#' m <- 2 +#' # pk is the probability of selection of every single unit +#' pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) +#' # Selection of a random sample with replacement +#' sam <- sample(5,2, replace=TRUE, prob=pk) +#' # The selected sample is +#' U[sam] +#' # The values of the variables of interest for the units in the sample +#' y1[sam] +#' y2[sam] +#' y3[sam,] +#' # The Hansen-Hurwitz estimator +#' HH(y1[sam],pk[sam]) +#' HH(y2[sam],pk[sam]) +#' HH(y3[sam,],pk[sam]) +#' +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a simple random sample with replacement +#' data(Lucy) +#' attach(Lucy) +#' +#' N <- dim(Lucy)[1] +#' m <- 400 +#' sam <- sample(N,m,replace=TRUE) +#' # The vector of selection probabilities of units in the sample +#' pk <- rep(1/N,m) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' # The variables of interest are: Income, Employees and Taxes +#' # This information is stored in a data frame called estima +#' estima <- data.frame(Income, Employees, Taxes) +#' HH(estima, pk) +#' +#' ################################################################ +#' ## Example 3 HH is unbiased for with replacement sampling designs +#' ################################################################ +#' +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vector y1 and y2 are the values of the variables of interest +#' y<-c(32, 34, 46, 89, 35) +#' # The population size is N=5 +#' N <- length(U) +#' # The sample size is m=2 +#' m <- 2 +#' # pk is the probability of selection of every single unit +#' pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) +#' # p is the probability of selection of every possible sample +#' p <- p.WR(N,m,pk) +#' p +#' sum(p) +#' # The sample membership matrix for random size without replacement sampling designs +#' Ind <- nk(N,m) +#' Ind +#' # The support with the values of the elements +#' Qy <- SupportWR(N,m, ID=y) +#' Qy +#' # The support with the values of the elements +#' Qp <- SupportWR(N,m, ID=pk) +#' Qp +#' # The HT estimates for every single sample in the support +#' HH1 <- HH(Qy[1,], Qp[1,])[1,] +#' HH2 <- HH(Qy[2,], Qp[2,])[1,] +#' HH3 <- HH(Qy[3,], Qp[3,])[1,] +#' HH4 <- HH(Qy[4,], Qp[4,])[1,] +#' HH5 <- HH(Qy[5,], Qp[5,])[1,] +#' HH6 <- HH(Qy[6,], Qp[6,])[1,] +#' HH7 <- HH(Qy[7,], Qp[7,])[1,] +#' HH8 <- HH(Qy[8,], Qp[8,])[1,] +#' HH9 <- HH(Qy[9,], Qp[9,])[1,] +#' HH10 <- HH(Qy[10,], Qp[10,])[1,] +#' HH11 <- HH(Qy[11,], Qp[11,])[1,] +#' HH12 <- HH(Qy[12,], Qp[12,])[1,] +#' HH13 <- HH(Qy[13,], Qp[13,])[1,] +#' HH14 <- HH(Qy[14,], Qp[14,])[1,] +#' HH15 <- HH(Qy[15,], Qp[15,])[1,] +#' # The HT estimates arranged in a vector +#' Est <- c(HH1, HH2, HH3, HH4, HH5, HH6, HH7, HH8, HH9, HH10, HH11, HH12, HH13, +#' HH14, HH15) +#' Est +#' # The HT is actually desgn-unbiased +#' data.frame(Ind, Est, p) +#' sum(Est*p) +#' sum(y) -HH <- function(y,pk){ - y <- as.data.frame(y) - m <- length(pk) - Total <- matrix(NA,nrow=3,ncol=dim(y)[2]) - rownames(Total)=c("Estimation", "Standard Error","CVE") - colnames(Total) <- names(y) - - for(k in 1:dim(y)[2]){ - ty <- sum(y[,k]/pk)/m - Vty <- (1/m)*(1/(m-1))*sum((y[,k]/pk-ty)^2) - CVe <- 100*sqrt(Vty)/ty - Total[,k] <- c(ty,sqrt(Vty),CVe) - } - return(Total) +HH <- function(y, pk) { + y <- t(as.matrix(y)) + pk <- as.matrix(pk) + m <- length(pk) + result <- (1/m) * (y %*% (1/pk)) + result } \ No newline at end of file diff --git a/R/HT.r b/R/HT.r index 3d67c37..f595397 100644 --- a/R/HT.r +++ b/R/HT.r @@ -30,16 +30,277 @@ #' @seealso \code{\link{VarHT}}, \code{\link{E.SI}}, \code{\link{E.piPS}} #' #' @examples -#' # Population N = 5, sample size n = 2 -#' N <- 5 +#' ############ +#' ## Example 1 +#' ############ +#' # Uses the Lucy data to draw a simple random sample without replacement +#' data(Lucy) +#' attach(Lucy) +#' +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- sample(N,n) +#' # The vector of inclusion probabilities for each unit in the sample +#' pik <- rep(n/N,n) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' # The variables of interest are: Income, Employees and Taxes +#' # This information is stored in a data frame called estima +#' estima <- data.frame(Income, Employees, Taxes) +#' HT(estima, pik) +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a simple random sample with replacement +#' data(Lucy) +#' +#' N <- dim(Lucy)[1] +#' m <- 400 +#' sam <- sample(N,m,replace=TRUE) +#' # The vector of selection probabilities of units in the sample +#' pk <- rep(1/N,m) +#' # Computation of the inclusion probabilities +#' pik <- 1-(1-pk)^m +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' # The variables of interest are: Income, Employees and Taxes +#' # This information is stored in a data frame called estima +#' estima <- data.frame(Income, Employees, Taxes) +#' HT(estima, pik) +#' +#' ############ +#' ## Example 3 +#' ############ +#' # Without replacement sampling +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vector y1 and y2 are the values of the variables of interest +#' y1<-c(32, 34, 46, 89, 35) +#' y2<-c(1,1,1,0,0) +#' y3<-cbind(y1,y2) +#' # The population size is N=5 +#' N <- length(U) +#' # The sample size is n=2 #' n <- 2 +#' # The sample membership matrix for fixed size without replacement sampling designs +#' Ind <- Ik(N,n) +#' # p is the probability of selection of every possible sample #' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -#' y <- c(32, 34, 46, 89, 35) -#' Ind <- Ik(N, n) -#' pik <- as.vector(Pik(p, Ind)) -#' # Select first sample (units 1 and 2) -#' sam <- c(1, 2) -#' HT(y[sam], pik[sam]) +#' # Computation of the inclusion probabilities +#' inclusion <- Pik(p, Ind) +#' # Selection of a random sample +#' sam <- sample(5,2) +#' # The selected sample +#' U[sam] +#' # The inclusion probabilities for these two units +#' inclusion[sam] +#' # The values of the variables of interest for the units in the sample +#' y1[sam] +#' y2[sam] +#' y3[sam,] +#' # The Horvitz-Thompson estimator +#' HT(y1[sam],inclusion[sam]) +#' HT(y2[sam],inclusion[sam]) +#' HT(y3[sam,],inclusion[sam]) +#' +#' ############ +#' ## Example 4 +#' ############ +#' # Following Example 3... With replacement sampling +#' # The population size is N=5 +#' N <- length(U) +#' # The sample size is m=2 +#' m <- 2 +#' # pk is the probability of selection of every single unit +#' pk <- c(0.9, 0.025, 0.025, 0.025, 0.025) +#' # Computation of the inclusion probabilities +#' pik <- 1-(1-pk)^m +#' # Selection of a random sample with replacement +#' sam <- sample(5,2, replace=TRUE, prob=pk) +#' # The selected sample +#' U[sam] +#' # The inclusion probabilities for these two units +#' inclusion[sam] +#' # The values of the variables of interest for the units in the sample +#' y1[sam] +#' y2[sam] +#' y3[sam,] +#' # The Horvitz-Thompson estimator +#' HT(y1[sam],inclusion[sam]) +#' HT(y2[sam],inclusion[sam]) +#' HT(y3[sam,],inclusion[sam]) +#' +#' #################################################################### +#' ## Example 5 HT is unbiased for without replacement sampling designs +#' ## Fixed sample size +#' #################################################################### +#' +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vector y1 and y2 are the values of the variables of interest +#' y<-c(32, 34, 46, 89, 35) +#' # The population size is N=5 +#' N <- length(U) +#' # The sample size is n=2 +#' n <- 2 +#' # The sample membership matrix for fixed size without replacement sampling designs +#' Ind <- Ik(N,n) +#' Ind +#' # p is the probability of selection of every possible sample +#' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) +#' sum(p) +#' # Computation of the inclusion probabilities +#' inclusion <- Pik(p, Ind) +#' inclusion +#' sum(inclusion) +#' # The support with the values of the elements +#' Qy <-Support(N,n,ID=y) +#' Qy +#' # The HT estimates for every single sample in the support +#' HT1<- HT(y[Ind[1,]==1], inclusion[Ind[1,]==1]) +#' HT2<- HT(y[Ind[2,]==1], inclusion[Ind[2,]==1]) +#' HT3<- HT(y[Ind[3,]==1], inclusion[Ind[3,]==1]) +#' HT4<- HT(y[Ind[4,]==1], inclusion[Ind[4,]==1]) +#' HT5<- HT(y[Ind[5,]==1], inclusion[Ind[5,]==1]) +#' HT6<- HT(y[Ind[6,]==1], inclusion[Ind[6,]==1]) +#' HT7<- HT(y[Ind[7,]==1], inclusion[Ind[7,]==1]) +#' HT8<- HT(y[Ind[8,]==1], inclusion[Ind[8,]==1]) +#' HT9<- HT(y[Ind[9,]==1], inclusion[Ind[9,]==1]) +#' HT10<- HT(y[Ind[10,]==1], inclusion[Ind[10,]==1]) +#' # The HT estimates arranged in a vector +#' Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10) +#' Est +#' # The HT is actually desgn-unbiased +#' data.frame(Ind, Est, p) +#' sum(Est*p) +#' sum(y) +#' +#' #################################################################### +#' ## Example 6 HT is unbiased for without replacement sampling designs +#' ## Random sample size +#' #################################################################### +#' +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vector y1 and y2 are the values of the variables of interest +#' y<-c(32, 34, 46, 89, 35) +#' # The population size is N=5 +#' N <- length(U) +#' # The sample membership matrix for random size without replacement sampling designs +#' Ind <- IkRS(N) +#' Ind +#' # p is the probability of selection of every possible sample +#' p <- c(0.59049, 0.06561, 0.06561, 0.06561, 0.06561, 0.06561, 0.00729, 0.00729, +#' 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00081, +#' 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, +#' 0.00009, 0.00009, 0.00009, 0.00009, 0.00009, 0.00001) +#' sum(p) +#' # Computation of the inclusion probabilities +#' inclusion <- Pik(p, Ind) +#' inclusion +#' sum(inclusion) +#' # The support with the values of the elements +#' Qy <-SupportRS(N, ID=y) +#' Qy +#' # The HT estimates for every single sample in the support +#' HT1<- HT(y[Ind[1,]==1], inclusion[Ind[1,]==1]) +#' HT2<- HT(y[Ind[2,]==1], inclusion[Ind[2,]==1]) +#' HT3<- HT(y[Ind[3,]==1], inclusion[Ind[3,]==1]) +#' HT4<- HT(y[Ind[4,]==1], inclusion[Ind[4,]==1]) +#' HT5<- HT(y[Ind[5,]==1], inclusion[Ind[5,]==1]) +#' HT6<- HT(y[Ind[6,]==1], inclusion[Ind[6,]==1]) +#' HT7<- HT(y[Ind[7,]==1], inclusion[Ind[7,]==1]) +#' HT8<- HT(y[Ind[8,]==1], inclusion[Ind[8,]==1]) +#' HT9<- HT(y[Ind[9,]==1], inclusion[Ind[9,]==1]) +#' HT10<- HT(y[Ind[10,]==1], inclusion[Ind[10,]==1]) +#' HT11<- HT(y[Ind[11,]==1], inclusion[Ind[11,]==1]) +#' HT12<- HT(y[Ind[12,]==1], inclusion[Ind[12,]==1]) +#' HT13<- HT(y[Ind[13,]==1], inclusion[Ind[13,]==1]) +#' HT14<- HT(y[Ind[14,]==1], inclusion[Ind[14,]==1]) +#' HT15<- HT(y[Ind[15,]==1], inclusion[Ind[15,]==1]) +#' HT16<- HT(y[Ind[16,]==1], inclusion[Ind[16,]==1]) +#' HT17<- HT(y[Ind[17,]==1], inclusion[Ind[17,]==1]) +#' HT18<- HT(y[Ind[18,]==1], inclusion[Ind[18,]==1]) +#' HT19<- HT(y[Ind[19,]==1], inclusion[Ind[19,]==1]) +#' HT20<- HT(y[Ind[20,]==1], inclusion[Ind[20,]==1]) +#' HT21<- HT(y[Ind[21,]==1], inclusion[Ind[21,]==1]) +#' HT22<- HT(y[Ind[22,]==1], inclusion[Ind[22,]==1]) +#' HT23<- HT(y[Ind[23,]==1], inclusion[Ind[23,]==1]) +#' HT24<- HT(y[Ind[24,]==1], inclusion[Ind[24,]==1]) +#' HT25<- HT(y[Ind[25,]==1], inclusion[Ind[25,]==1]) +#' HT26<- HT(y[Ind[26,]==1], inclusion[Ind[26,]==1]) +#' HT27<- HT(y[Ind[27,]==1], inclusion[Ind[27,]==1]) +#' HT28<- HT(y[Ind[28,]==1], inclusion[Ind[28,]==1]) +#' HT29<- HT(y[Ind[29,]==1], inclusion[Ind[29,]==1]) +#' HT30<- HT(y[Ind[30,]==1], inclusion[Ind[30,]==1]) +#' HT31<- HT(y[Ind[31,]==1], inclusion[Ind[31,]==1]) +#' HT32<- HT(y[Ind[32,]==1], inclusion[Ind[32,]==1]) +#' # The HT estimates arranged in a vector +#' Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10, HT11, HT12, HT13, +#' HT14, HT15, HT16, HT17, HT18, HT19, HT20, HT21, HT22, HT23, HT24, HT25, HT26, +#' HT27, HT28, HT29, HT30, HT31, HT32) +#' Est +#' # The HT is actually desgn-unbiased +#' data.frame(Ind, Est, p) +#' sum(Est*p) +#' sum(y) +#' +#' ################################################################ +#' ## Example 7 HT is unbiased for with replacement sampling designs +#' ################################################################ +#' +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vector y1 and y2 are the values of the variables of interest +#' y<-c(32, 34, 46, 89, 35) +#' # The population size is N=5 +#' N <- length(U) +#' # The sample size is m=2 +#' m <- 2 +#' # pk is the probability of selection of every single unit +#' pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) +#' # p is the probability of selection of every possible sample +#' p <- p.WR(N,m,pk) +#' p +#' sum(p) +#' # The sample membership matrix for random size without replacement sampling designs +#' Ind <- IkWR(N,m) +#' Ind +#' # The support with the values of the elements +#' Qy <- SupportWR(N,m, ID=y) +#' Qy +#' # Computation of the inclusion probabilities +#' pik <- 1-(1-pk)^m +#' pik +#' # The HT estimates for every single sample in the support +#' HT1 <- HT(y[Ind[1,]==1], pik[Ind[1,]==1]) +#' HT2 <- HT(y[Ind[2,]==1], pik[Ind[2,]==1]) +#' HT3 <- HT(y[Ind[3,]==1], pik[Ind[3,]==1]) +#' HT4 <- HT(y[Ind[4,]==1], pik[Ind[4,]==1]) +#' HT5 <- HT(y[Ind[5,]==1], pik[Ind[5,]==1]) +#' HT6 <- HT(y[Ind[6,]==1], pik[Ind[6,]==1]) +#' HT7 <- HT(y[Ind[7,]==1], pik[Ind[7,]==1]) +#' HT8 <- HT(y[Ind[8,]==1], pik[Ind[8,]==1]) +#' HT9 <- HT(y[Ind[9,]==1], pik[Ind[9,]==1]) +#' HT10 <- HT(y[Ind[10,]==1], pik[Ind[10,]==1]) +#' HT11 <- HT(y[Ind[11,]==1], pik[Ind[11,]==1]) +#' HT12 <- HT(y[Ind[12,]==1], pik[Ind[12,]==1]) +#' HT13 <- HT(y[Ind[13,]==1], pik[Ind[13,]==1]) +#' HT14 <- HT(y[Ind[14,]==1], pik[Ind[14,]==1]) +#' HT15 <- HT(y[Ind[15,]==1], pik[Ind[15,]==1]) +#' # The HT estimates arranged in a vector +#' Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10, HT11, HT12, HT13, +#' HT14, HT15) +#' Est +#' # The HT is actually desgn-unbiased +#' data.frame(Ind, Est, p) +#' sum(Est*p) +#' sum(y) HT <- function(y, Pik) { y <- t(as.matrix(y)) diff --git a/R/IPFP.r b/R/IPFP.r index e07ae1c..45df2e7 100644 --- a/R/IPFP.r +++ b/R/IPFP.r @@ -35,11 +35,35 @@ #' @seealso \code{\link{Domains}}, \code{\link{Wk}} #' #' @examples -#' # A 2x2 table to be raked to known marginals -#' Table <- matrix(c(10, 20, 30, 40), nrow = 2) -#' Row.knw <- c(40, 60) -#' Col.knw <- c(35, 65) -#' IPFP(Table, Col.knw, Row.knw) +#' ############ +#' ## Example 1 +#' ############ +#' Table <- matrix(c(80, 90, 10, 170, 80, 80, 150, 210, 130), 3, 3) +#' rownames(Table) <- c("a1", "a2", "a3") +#' colnames(Table) <- c("b1", "b2", "b3") +#' Col.knw <- c(150, 300, 550) +#' Row.knw <- c(430, 360, 210) +#' IPFP(Table, Col.knw, Row.knw, tol = 0.0001) +#' ############ +#' ## Example 2 +#' ############ +#' data(Lucy) +#' attach(Lucy) +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- sample(N, n) +#' data <- Lucy[sam, ] +#' attach(data) +#' Doma1 <- Domains(Level) +#' Doma2 <- Domains(SPAM) +#' SPAM.no <- Doma2[, 1] * Doma1 +#' SPAM.yes <- Doma2[, 2] * Doma1 +#' est1 <- E.SI(N, n, SPAM.no)[, 2:4] +#' est2 <- E.SI(N, n, SPAM.yes)[, 2:4] +#' Table <- cbind(est1[1, ], est2[1, ]) +#' Col.knw <- colSums(Domains(Lucy$SPAM)) +#' Row.knw <- colSums(Domains(Lucy$Level)) +#' IPFP(Table, Col.knw, Row.knw, tol = 0.0001) IPFP <- function(Table, Col.knw, Row.knw, tol = 0.0001) { Table <- as.matrix(Table) @@ -71,4 +95,4 @@ IPFP <- function(Table, Col.knw, Row.knw, tol = 0.0001) { p2 <- cbind(p1, c(Row.est, sum(Row.est))) colnames(p2)[J + 1] <- c("Row.est") return(p2) -} +} \ No newline at end of file diff --git a/R/Ik.r b/R/Ik.r index 116fd74..a8e6130 100644 --- a/R/Ik.r +++ b/R/Ik.r @@ -28,12 +28,12 @@ #' @seealso \code{\link{Pik}}, \code{\link{Pikl}}, \code{\link{Support}} #' #' @examples -#' # All possible samples of size n = 2 from N = 4 units -#' N <- 4 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) #' n <- 2 +#' # The sample membership matrix #' Ik(N, n) -#' # Number of rows equals choose(N, n) = 6 -#' nrow(Ik(N, n)) == choose(N, n) +#' # The first unit, Yves, belongs to the first four possible samples Ik <- function(N, n) { Q <- Support(N, n, ID = FALSE) diff --git a/R/IkRS.r b/R/IkRS.r index 41b4a93..a40d83b 100644 --- a/R/IkRS.r +++ b/R/IkRS.r @@ -27,10 +27,11 @@ #' @seealso \code{\link{Ik}}, \code{\link{SupportRS}} #' #' @examples -#' # Full indicator matrix for N = 3 -#' IkRS(3) -#' # Number of rows: 1 (empty) + 3 + 3 + 1 = 8 = 2^3 -#' nrow(IkRS(3)) +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) +#' # The sample membership matrix for all sample sizes +#' IkRS(N) +#' # The first sample is a null one and the last sample is a census IkRS <- function(N) { sam <- matrix(0, ncol = N, nrow = 1) diff --git a/R/IkWR.r b/R/IkWR.r index 3eee64c..752e7e7 100644 --- a/R/IkWR.r +++ b/R/IkWR.r @@ -29,12 +29,11 @@ #' @seealso \code{\link{Ik}}, \code{\link{SupportWR}}, \code{\link{nk}} #' #' @examples -#' # With-replacement support: N = 3 units, m = 2 draws -#' N <- 3 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) #' m <- 2 +#' # The sample membership matrix for with-replacement sampling #' IkWR(N, m) -#' # Number of rows = choose(N + m - 1, m) = choose(4, 2) = 6 -#' nrow(IkWR(N, m)) == choose(N + m - 1, m) IkWR <- function(N, m) { Q <- SupportWR(N, m, ID = FALSE) diff --git a/R/OrderWR.r b/R/OrderWR.r index 78728ec..ab120ba 100644 --- a/R/OrderWR.r +++ b/R/OrderWR.r @@ -31,13 +31,17 @@ #' @seealso \code{\link{SupportWR}}, \code{\link{IkWR}} #' #' @examples -#' # All ordered sequences of 2 draws from N = 3 units -#' OrderWR(N = 3, m = 2) -#' # N^m = 9 rows -#' -#' # With population labels -#' U <- c("A", "B", "C") -#' OrderWR(N = 3, m = 2, ID = U) +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) +#' # Five possible ordered samples of size m=1 +#' OrderWR(N, 1) +#' OrderWR(N, 1, ID = U) +#' # 25 possible ordered samples of size m=2 +#' OrderWR(N, 2) +#' OrderWR(N, 2, ID = U) +#' # Note: ordered samples differ from unordered (SupportWR) +#' OrderWR(N, 2) +#' SupportWR(N, 2) OrderWR <- function(N, m, ID = FALSE) { b <- c(1:N) diff --git a/R/PikHol.r b/R/PikHol.r index 0d61bbe..5d3de94 100644 --- a/R/PikHol.r +++ b/R/PikHol.r @@ -36,13 +36,36 @@ #' @seealso \code{\link{PikPPS}}, \code{\link{PikSTPPS}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' data(Lucy) #' attach(Lucy) -#' # Two surveys with different auxiliary variables -#' sigma <- cbind(Employees, Income) -#' n <- c(100, 150) -#' pik <- PikHol(n, sigma, e = 0.1) -#' sum(pik <= 1) # all valid probabilities +#' N <- dim(Lucy)[1] +#' n <- c(350, 400) +#' sigy1 <- sqrt(Income^(1)) +#' sigy2 <- sqrt(Income^(2)) +#' sigma <- cbind(sigy1, sigy2) +#' Piks <- PikHol(n, sigma, 0.03) +#' n.opt <- round(sum(Piks)) +#' res <- S.piPS(n.opt, Piks) +#' sam <- res[, 1] +#' Pik.s <- res[, 2] +#' estima <- data.frame(Lucy$Income[sam], Lucy$Employees[sam]) +#' E.piPS(estima, Pik.s) +#' ############ +#' ## Example 2 - with custom inclusion probabilities +#' ############ +#' data(Lucy) +#' attach(Lucy) +#' N <- dim(Lucy)[1] +#' n <- c(350, 400) +#' sigy1 <- sqrt(Income^(1)) +#' sigy2 <- sqrt(Income^(2)) +#' sigma <- cbind(sigy1, sigy2) +#' pikas <- cbind(rep(400/N, N), rep(400/N, N)) +#' Piks <- PikHol(n, sigma, 0.03, pikas) +#' round(sum(Piks)) PikHol <- function(n, sigma, e, Pi = NULL) { N <- dim(sigma)[1] diff --git a/R/PikPPS.r b/R/PikPPS.r index cab08ea..0cb0b3f 100644 --- a/R/PikPPS.r +++ b/R/PikPPS.r @@ -28,15 +28,50 @@ #' @seealso \code{\link{S.piPS}}, \code{\link{PikSTPPS}}, \code{\link{PikHol}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' x <- c(30,41,50,170,43,200) +#' n <- 3 +#' # Two elements yields values bigger than one +#' n*x/sum(x) +#' # With this functions, all of the values are between zero and one +#' PikPPS(n,x) +#' # The sum is equal to the sample size +#' sum(PikPPS(n,x)) +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # The auxiliary information +#' x <- c(52, 60, 75, 100, 50) +#' # Gives the inclusion probabilities for the population accordin to a +#' # proportional to size design without replacement of size n=4 +#' pik <- PikPPS(4,x) +#' pik +#' # The selected sample is +#' sum(pik) +#' +#' ############ +#' ## Example 3 +#' ############ +#' # Uses the Lucy data to compute teh vector of inclusion probabilities +#' # accordind to a piPS without replacement design +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n <- 400 -#' Pik <- PikPPS(n, Employees) -#' # Check: sum equals n -#' sum(Pik) -#' # All values are valid probabilities -#' all(Pik > 0 & Pik <= 1) +#' # The sample size +#' n=400 +#' # The selection probability of each unit is proportional to the variable Income +#' pik <- PikPPS(n,Income) +#' # The inclusion probabilities of the units in the sample +#' pik +#' # The sum of the values in pik is equal to the sample size +#' sum(pik) +#' # According to the design some elements must be selected +#' # They are called forced inclusion units +#' which(pik==1) PikPPS <- function(n, x) { pik <- n * x/sum(x) diff --git a/R/Pikl.r b/R/Pikl.r index 4eec0cd..fbb3a45 100644 --- a/R/Pikl.r +++ b/R/Pikl.r @@ -32,12 +32,13 @@ #' @seealso \code{\link{Pik}}, \code{\link{Deltakl}}, \code{\link{VarHT}} #' #' @examples -#' # Population N = 5, sample size n = 2 -#' N <- 5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) #' n <- 2 #' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -#' pi2 <- Pikl(N, n, p) -#' pi2 +#' sum(p) +#' # Second-order inclusion probabilities +#' Pikl(N, n, p) Pikl <- function(N, n, p) { Sam <- Ik(N, n) diff --git a/R/S.BE.r b/R/S.BE.r index ac416ca..92686ba 100644 --- a/R/S.BE.r +++ b/R/S.BE.r @@ -26,23 +26,33 @@ #' @seealso \code{\link{E.BE}}, \code{\link{S.PO}}, \code{\link{S.SI}} #' #' @examples -#' # Population of size N = 100, inclusion probability 10% -#' N <- 100 -#' prob <- 0.1 -#' sam <- S.BE(N, prob) +#' ############ +#' ## Example 1 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Draws a Bernoulli sample without replacement of expected size n=3 +#' # The inlusion probability is 0.6 for each unit in the population +#' sam <- S.BE(5,0.6) +#' sam +#' # The selected sample is +#' U[sam] #' -#' # Extract selected indices -#' selected <- sam[sam != 0] -#' length(selected) # random, around 10 +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a Bernoulli sample #' -#' # Using Lucy data -#' data('Lucy') -#' N <- nrow(Lucy) -#' prob <- 0.05 -#' sam <- S.BE(N, prob) -#' sam <- sam[sam != 0] -#' y <- data.frame(Income = Lucy$Income[sam]) -#' E.BE(y, prob) +#' data(Lucy) +#' attach(Lucy) +#' N <- dim(Lucy)[1] +#' # The population size is 2396. If the expected sample size is 400 +#' # then, the inclusion probability must be 400/2396=0.1669 +#' sam <- S.BE(N,0.01669) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' data +#' dim(data) S.BE <- function(N, prob) { sam <- matrix(0, N, 1) diff --git a/R/S.PO.r b/R/S.PO.r index 1c7c6d3..6bb7b84 100644 --- a/R/S.PO.r +++ b/R/S.PO.r @@ -27,15 +27,36 @@ #' @seealso \code{\link{E.PO}}, \code{\link{PikPPS}}, \code{\link{S.piPS}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Draws a Bernoulli sample without replacement of expected size n=3 +#' # "Erik" is drawn in every possible sample becuse its inclusion probability is one +#' Pik <- c(0.5, 0.2, 1, 0.9, 0.5) +#' sam <- S.PO(5,Pik) +#' sam +#' # The selected sample is +#' U[sam] +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a Poisson sample +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n <- 400 -#' Pik <- PikPPS(n, Employees) -#' sam <- S.PO(N, Pik) -#' sam <- sam[sam != 0] -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.PO(y, Pik[sam]) +#' N <- dim(Lucy)[1] +#' n <- 400 +#' Pik<-n*Income/sum(Income) +#' # None element of Pik bigger than one +#' which(Pik>1) +#' # The selected sample +#' sam <- S.PO(N,Pik) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' data +#' dim(data) S.PO <- function(N, Pik) { sam <- matrix(0, N, 1) diff --git a/R/S.PPS.r b/R/S.PPS.r index 43334ae..3970694 100644 --- a/R/S.PPS.r +++ b/R/S.PPS.r @@ -30,14 +30,35 @@ #' @seealso \code{\link{E.PPS}}, \code{\link{HH}}, \code{\link{S.piPS}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # The auxiliary information +#' x <- c(52, 60, 75, 100, 50) +#' # Draws a PPS sample with replacement of size m=3 +#' res <- S.PPS(3,x) +#' sam <- res[,1] +#' # The selected sample is +#' U[sam] +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a random sample according to a +#' # PPS with replacement design +#' data(Lucy) #' attach(Lucy) -#' m <- 400 -#' res <- S.PPS(m, Employees) -#' sam <- res[, 1] -#' pk <- res[, 2] -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.PPS(y, pk) +#' # The selection probability of each unit is proportional to the variable Income +#' m <- 400 +#' res<-S.PPS(400,Income) +#' # The selected sample +#' sam <- res[,1] +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' data +#' dim(data) S.PPS <- function(m, x) { N <- length(x) diff --git a/R/S.SI.r b/R/S.SI.r index a2121fb..d0f0e70 100644 --- a/R/S.SI.r +++ b/R/S.SI.r @@ -32,14 +32,30 @@ #' @seealso \code{\link{E.SI}}, \code{\link{S.STSI}}, \code{\link{S.SY}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Fixes the random numbers in order to select a sample +#' e <- c(0.4938, 0.7044, 0.4585, 0.6747, 0.0640) +#' # Draws a simple random sample without replacement of size n=3 +#' sam <- S.SI(5, 3, e) +#' sam +#' # The selected sample is +#' U[sam] +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a random sample according to a SI design +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n <- 400 +#' N <- dim(Lucy)[1] +#' n <- 400 #' sam <- S.SI(N, n) -#' sam <- sam[sam != 0] -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.SI(N, n, y) +#' # The information about the units in the sample +#' data <- Lucy[sam, ] +#' dim(data) S.SI <- function(N, n, e = runif(N)) { c <- matrix(0, N, 1) diff --git a/R/S.STPPS.r b/R/S.STPPS.r index 64b6883..7f95506 100644 --- a/R/S.STPPS.r +++ b/R/S.STPPS.r @@ -33,15 +33,52 @@ #' @seealso \code{\link{S.PPS}}, \code{\link{S.STpiPS}}, \code{\link{E.STPPS}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # The auxiliary information +#' x <- c(52, 60, 75, 100, 50) +#' # Vector Strata contains an indicator variable of stratum membership +#' Strata <- c("A", "A", "A", "B", "B") +#' # Then sample size in each stratum +#' mh <- c(2,2) +#' # Draws a stratified PPS sample with replacement of size n=4 +#' res <- S.STPPS(Strata, x, mh) +#' # The selected sample +#' sam <- res[,1] +#' U[sam] +#' # The selection probability of each unit selected to be in the sample +#' pk <- res[,2] +#' pk +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a stratified random sample +#' # according to a PPS design in each stratum +#' +#' data(Lucy) #' attach(Lucy) -#' mh <- c(20, 30, 50) -#' res <- S.STPPS(Level, Employees, mh) -#' head(res) -#' sam <- res$sam -#' pk <- res$pk -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.STPPS(y, pk, mh, Level[sam]) +#' # Level is the stratifying variable +#' summary(Level) +#' # Defines the sample size at each stratum +#' m1<-70 +#' m2<-100 +#' m3<-200 +#' mh<-c(m1,m2,m3) +#' # Draws a stratified sample +#' res<-S.STPPS(Level, Income, mh) +#' # The selected sample +#' sam<-res[,1] +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' data +#' dim(data) +#' # The selection probability of each unit selected in the sample +#' pk <- res[,2] +#' pk S.STPPS <- function(S, x, mh) { S <- as.factor(S) diff --git a/R/S.STSI.r b/R/S.STSI.r index bdd26a2..f1b7ba3 100644 --- a/R/S.STSI.r +++ b/R/S.STSI.r @@ -31,14 +31,29 @@ #' @seealso \code{\link{E.STSI}}, \code{\link{S.SI}}, \code{\link{S.STpiPS}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' Strata <- c("A", "A", "A", "B", "B") +#' Nh <- c(3, 2) +#' nh <- c(2, 1) +#' sam <- S.STSI(Strata, Nh, nh) +#' sam +#' U[sam] +#' ############ +#' ## Example 2 +#' ############ +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' Nh <- as.numeric(table(Level)) +#' N1 <- summary(Level)[[1]] +#' N2 <- summary(Level)[[2]] +#' N3 <- summary(Level)[[3]] +#' Nh <- c(N1, N2, N3) #' nh <- c(70, 100, 200) #' sam <- S.STSI(Level, Nh, nh) -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.STSI(Level[sam], Nh, nh, y) +#' data <- Lucy[sam, ] +#' dim(data) S.STSI <- function(S, Nh, nh) { S <- as.factor(S) diff --git a/R/S.STpiPS.R b/R/S.STpiPS.R index e6b7c46..f9a7160 100644 --- a/R/S.STpiPS.R +++ b/R/S.STpiPS.R @@ -34,17 +34,32 @@ #' \code{\link{PikSTPPS}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' x <- c(52, 60, 75, 100, 50) +#' Strata <- c("A", "A", "A", "B", "B") +#' nh <- c(2, 2) +#' res <- S.STpiPS(Strata, x, nh) +#' sam <- res[, 1] +#' U[sam] +#' pik <- res[, 2] +#' pik +#' ############ +#' ## Example 2 +#' ############ +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n1 <- 70; n2 <- 100; n3 <- 200 -#' nh <- c(n1, n2, n3) +#' N1 <- summary(Level)[[1]] +#' N2 <- summary(Level)[[2]] +#' N3 <- summary(Level)[[3]] +#' nh <- c(70, 100, 200) #' res <- S.STpiPS(Level, Employees, nh) -#' head(res) #' sam <- res[, 1] -#' Pik <- res[, 2] -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.STpiPS(y, Pik, Level[sam]) +#' data <- Lucy[sam, ] +#' dim(data) +#' pik <- res[, 2] S.STpiPS <- function(S, x, nh) { S <- as.factor(S) diff --git a/R/S.SY.r b/R/S.SY.r index 571d4df..6130d48 100644 --- a/R/S.SY.r +++ b/R/S.SY.r @@ -28,13 +28,34 @@ #' @seealso \code{\link{E.SY}}, \code{\link{S.SI}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # The population of size N=5 is divided in a=2 groups +#' # Draws a Systematic sample. +#' sam <- S.SY(5,2) +#' sam +#' # The selected sample is +#' U[sam] +#' # There are only two possible samples +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a Systematic sample +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' a <- 10 -#' sam <- S.SY(N, a) -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.SY(N, a, y) +#' +#' N <- dim(Lucy)[1] +#' # The population is divided in 6 groups +#' # The selected sample +#' sam <- S.SY(N,6) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' data +#' dim(data) S.SY <- function(N, a) { r <- sample(a, 1) diff --git a/R/S.WR.r b/R/S.WR.r index 00a317f..335dfa8 100644 --- a/R/S.WR.r +++ b/R/S.WR.r @@ -27,13 +27,32 @@ #' @seealso \code{\link{E.WR}}, \code{\link{S.SI}}, \code{\link{S.PPS}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Draws a simple random sample witho replacement of size m=3 +#' sam <- S.WR(5,3) +#' sam +#' # The selected sample +#' U[sam] +#' +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data to draw a random sample of units accordind to a +#' # simple random sampling with replacement design +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' m <- 400 -#' sam <- S.WR(N, m) -#' y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -#' E.WR(N, m, y) +#' +#' N <- dim(Lucy)[1] +#' m <- 400 +#' sam<-S.WR(N,m) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' data +#' dim(data) S.WR <- function(N, m) { nk <- rep(0, N) diff --git a/R/S.piPS.r b/R/S.piPS.r index d626b3d..b3fc038 100644 --- a/R/S.piPS.r +++ b/R/S.piPS.r @@ -1,36 +1,84 @@ #' @export +#' +#' @title +#' Probability Proportional to Size Without-Replacement Sampling (piPS) +#' @description +#' Draws a without-replacement sample of size \code{n} using a sequential +#' algorithm that produces inclusion probabilities proportional to an +#' auxiliary size variable \code{x}. +#' @return +#' A matrix with \code{n} rows and two columns: +#' \itemize{ +#' \item Column 1: population indices of the selected units. +#' \item Column 2: first-order inclusion probabilities of the selected units. +#' } +#' @author Hugo Andres Gutierrez Rojas +#' @param n Sample size. +#' @param x Vector of length \code{N} with positive auxiliary size values. +#' @param e Optional vector of \code{N} uniform random variates in \code{(0,1)}. +#' If omitted, \code{runif(N)} is used. +#' +#' @references +#' Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +#' \emph{Model Assisted Survey Sampling}. Springer.\cr +#' Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +#' y estimacion de parametros}. Editorial Universidad Santo Tomas. +#' +#' @seealso \code{\link{E.piPS}}, \code{\link{PikPPS}}, \code{\link{S.STPPS}} +#' +#' @examples +#' ############ +#' ## Example 1 +#' ############ +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' x <- c(52, 60, 75, 100, 50) +#' # Draws a piPS sample without replacement of size n=3 +#' res <- S.piPS(3, x) +#' res +#' sam <- res[, 1] +#' U[sam] +#' ############ +#' ## Example 2 +#' ############ +#' # Uses the Lucy data +#' data(Lucy) +#' attach(Lucy) +#' res <- S.piPS(400, Income) +#' sam <- res[, 1] +#' Pik.s <- res[, 2] +#' data <- Lucy[sam, ] +#' dim(data) -S.piPS <- function (n, x, e = runif(length(x))) { - if(length(x) != 1){ - N <- length(x) - x1 <- sort(x, decreasing = TRUE) +S.piPS <- function(n, x, e = runif(length(x))) { + if (length(x) != 1) { + N <- length(x) + x1 <- sort(x, decreasing = TRUE) Pik <- PikPPS(n, x1) - V <- cumsum(Pik) - nk <- matrix(0, N, 1) - d <- matrix(0, N, 1) - I <- matrix(0, N, 1) + V <- cumsum(Pik) + nk <- matrix(0, N, 1) + d <- matrix(0, N, 1) + I <- matrix(0, N, 1) sam <- matrix(0, N, 1) if (e[1] < Pik[1]) { - I[1] <- 1 + I[1] <- 1 sam[1] <- 1 } for (k in 2:N) { nk[k] <- nk[k - 1] + I[k - 1] - d[k] <- Pik[k] * (n - nk[k])/(n - V[k - 1]) + d[k] <- Pik[k] * (n - nk[k])/(n - V[k - 1]) if (e[k] <= d[k]) { - I[k] <- 1 + I[k] <- 1 sam[k] <- cumsum(I[1:(k - 1)])[(k - 1)] + I[k] } } - samp <- rev(order(x))[which(sam != 0)] - Pik1 <- PikPPS(n, x) + samp <- rev(order(x))[which(sam != 0)] + Pik1 <- PikPPS(n, x) Pik.s <- Pik1[samp] return(cbind(samp, Pik.s)) } - - if(length(x) == 1){ + if (length(x) == 1) { Pik.s <- 1 - samp <- 1 + samp <- 1 return(cbind(samp, Pik.s)) } } \ No newline at end of file diff --git a/R/Support.r b/R/Support.r index 9c904e6..9d0b37f 100644 --- a/R/Support.r +++ b/R/Support.r @@ -31,12 +31,16 @@ #' @seealso \code{\link{Ik}}, \code{\link{SupportWR}}, \code{\link{SupportRS}} #' #' @examples -#' # All samples of size 2 from a population of 5 #' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -#' Support(N = 5, n = 2, ID = U) -#' -#' # Integer indices only -#' Support(N = 5, n = 2) +#' N <- length(U) +#' n <- 2 +#' # Ten possible samples of size n=2 +#' Support(N, n) +#' # Labeled support +#' Support(N, n, ID = U) +#' # Support showing values of y +#' y <- c(32, 34, 46, 89, 35) +#' Support(N, n, ID = y) Support <- function(N, n, ID = FALSE) { m <- matrix(0, choose(N, n), n) diff --git a/R/SupportRS.r b/R/SupportRS.r index cdb035a..339658a 100644 --- a/R/SupportRS.r +++ b/R/SupportRS.r @@ -28,9 +28,12 @@ #' @seealso \code{\link{Support}}, \code{\link{IkRS}} #' #' @examples -#' # Complete support for N = 3 -#' SupportRS(3) -#' # 2^3 = 8 rows +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) +#' # Complete support for all sample sizes +#' SupportRS(N) +#' # Labeled support +#' SupportRS(N, ID = U) SupportRS <- function(N, ID = FALSE) { sam <- matrix(NA, ncol = N, nrow = 1) diff --git a/R/SupportWR.r b/R/SupportWR.r index 9601b22..458a1de 100644 --- a/R/SupportWR.r +++ b/R/SupportWR.r @@ -30,9 +30,14 @@ #' @seealso \code{\link{IkWR}}, \code{\link{nk}}, \code{\link{p.WR}} #' #' @examples -#' # All unordered outcomes: N = 3, m = 2 -#' SupportWR(N = 3, m = 2) -#' # choose(3+2-1, 2) = choose(4,2) = 6 rows +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) +#' m <- 2 +#' # With-replacement support +#' SupportWR(N, m) +#' SupportWR(N, m, ID = U) +#' y <- c(32, 34, 46, 89, 35) +#' SupportWR(N, m, ID = y) SupportWR <- function(N, m, ID = FALSE) { S <- 0 diff --git a/R/T.SIC.r b/R/T.SIC.r index ac22686..ce17696 100644 --- a/R/T.SIC.r +++ b/R/T.SIC.r @@ -29,18 +29,34 @@ #' @seealso \code{\link{E.1SI}}, \code{\link{E.2SI}} #' #' @examples -#' library(dplyr) -#' data('BigCity') -#' UI <- levels(as.factor(BigCity$PSU)) -#' NI <- length(UI) -#' nI <- 10 -#' sam <- S.SI(NI, nI) -#' sampleI <- UI[sam[sam != 0]] -#' CityI <- BigCity[BigCity$PSU %in% sampleI, ] -#' y <- data.frame(Income = CityI$Income, -#' Expenditure = CityI$Expenditure) -#' cluster <- CityI$PSU -#' T.SIC(y, cluster) +#' ############ +#' ## Example 1 +#' ############ +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' y1 <- c(32, 34, 46, 89, 35) +#' y2 <- c(1, 1, 1, 0, 0) +#' y3 <- cbind(y1, y2) +#' Cluster <- c("C1", "C2", "C1", "C2", "C1") +#' T.SIC(y1, Cluster) +#' T.SIC(y3, Cluster) +#' ############ +#' ## Example 2 - Cluster sampling with Lucy data +#' ############ +#' data(Lucy) +#' attach(Lucy) +#' UI <- c("A", "B", "C", "D", "E") +#' NI <- length(UI) +#' nI <- 2 +#' samI <- S.SI(NI, nI) +#' dataI <- UI[samI] +#' Lucy1 <- Lucy[which(Zone == dataI[1]), ] +#' Lucy2 <- Lucy[which(Zone == dataI[2]), ] +#' LucyI <- rbind(Lucy1, Lucy2) +#' attach(LucyI) +#' Cluster <- as.factor(as.integer(Zone)) +#' estima <- data.frame(Income, Employees, Taxes) +#' Ty <- T.SIC(estima, Cluster) +#' E.SI(NI, nI, Ty) T.SIC <- function(y, Cluster) { Cluster <- as.factor(Cluster) diff --git a/R/VarHT.r b/R/VarHT.r index 33a0575..213e2ca 100644 --- a/R/VarHT.r +++ b/R/VarHT.r @@ -34,11 +34,15 @@ #' @seealso \code{\link{Deltakl}}, \code{\link{VarSYGHT}}, \code{\link{HT}} #' #' @examples -#' N <- 5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' y1 <- c(32, 34, 46, 89, 35) +#' y2 <- c(1, 1, 1, 0, 0) +#' N <- length(U) #' n <- 2 -#' y <- c(32, 34, 46, 89, 35) #' p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -#' VarHT(y, N, n, p) +#' # Theoretical variance of the HT estimator +#' VarHT(y1, N, n, p) +#' VarHT(y2, N, n, p) VarHT <- function(y, N, n, p) { Ind <- Ik(N, n) diff --git a/R/VarSYGHT.R b/R/VarSYGHT.R index 462db6d..3352ba2 100644 --- a/R/VarSYGHT.R +++ b/R/VarSYGHT.R @@ -110,4 +110,4 @@ VarSYGHT <- function (y, N, n, p) } Resultado <- data.frame(I = Ind, p = p, Est.HT = Est.HT, Est.Var1 = Est.Var1, Est.Var2 = Est.Var2) return(Resultado) -} +} \ No newline at end of file diff --git a/R/Wk.r b/R/Wk.r index 0d57563..b8d0b82 100644 --- a/R/Wk.r +++ b/R/Wk.r @@ -36,18 +36,163 @@ #' @seealso \code{\link{GREG.SI}}, \code{\link{E.Beta}} #' #' @examples -#' data('Lucy') +#' ############ +#' ## Example 1 +#' ############ +#' # Without replacement sampling +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vector x is the auxiliary information and y is the variables of interest +#' x<-c(32, 34, 46, 89, 35) +#' y<-c(52, 60, 75, 100, 50) +#' # pik is some vector of inclusion probabilities in the sample +#' # In this case the sample size is equal to the population size +#' pik<-rep(1,5) +#' w1<-Wk(x,tx=236,pik,ck=1,b0=FALSE) +#' sum(x*w1) +#' # Draws a sample size without replacement +#' sam <- sample(5,2) +#' pik <- c (0.8,0.2,0.2,0.5,0.3) +#' # The auxiliary information an variable of interest in the selected smaple +#' x.s<-x[sam] +#' y.s<-y[sam] +#' # The vector of inclusion probabilities in the selected smaple +#' pik.s<-pik[sam] +#' # Calibration weights under some specifics model +#' w2<-Wk(x.s,tx=236,pik.s,ck=1,b0=FALSE) +#' sum(x.s*w2) +#' +#' w3<-Wk(x.s,tx=c(5,236),pik.s,ck=1,b0=TRUE) +#' sum(w3) +#' sum(x.s*w3) +#' +#' w4<-Wk(x.s,tx=c(5,236),pik.s,ck=x.s,b0=TRUE) +#' sum(w4) +#' sum(x.s*w4) +#' +#' w5<-Wk(x.s,tx=236,pik.s,ck=x.s,b0=FALSE) +#' sum(x.s*w5) +#' +#' ###################################################################### +#' ## Example 2: Linear models involving continuous auxiliary information +#' ###################################################################### +#' +#' # Draws a simple random sample without replacement +#' data(Lucy) #' attach(Lucy) -#' N <- nrow(Lucy) -#' n <- 400 -#' sam <- S.SI(N, n) +#' +#' N <- dim(Lucy)[1] +#' n <- 400 #' Pik <- rep(n/N, n) -#' x <- as.matrix(Employees[sam]) -#' tx <- sum(Employees) -#' ck <- rep(1, n) -#' wk <- Wk(x, tx, Pik, ck) -#' # Check calibration: weighted sum of x equals tx -#' sum(wk * x) +#' sam <- S.SI(N,n) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' +#' ########### common ratio model ################### +#' +#' estima<-data.frame(Income) +#' x <- Employees +#' tx <- sum(Lucy$Employees) +#' w <- Wk(x, tx, Pik, ck=1, b0=FALSE) +#' sum(x*w) +#' tx +#' # The calibration estimation +#' colSums(estima*w) +#' +#' ########### Simple regression model without intercept ################### +#' +#' estima<-data.frame(Income, Employees) +#' x <- Taxes +#' tx <- sum(Lucy$Taxes) +#' w<-Wk(x,tx,Pik,ck=x,b0=FALSE) +#' sum(x*w) +#' tx +#' # The calibration estimation +#' colSums(estima*w) +#' +#' ########### Multiple regression model without intercept ################### +#' +#' estima<-data.frame(Income) +#' x <- cbind(Employees, Taxes) +#' tx <- c(sum(Lucy$Employees), sum(Lucy$Taxes)) +#' w <- Wk(x,tx,Pik,ck=1,b0=FALSE) +#' sum(x[,1]*w) +#' sum(x[,2]*w) +#' tx +#' # The calibration estimation +#' colSums(estima*w) +#' +#' ########### Simple regression model with intercept ################### +#' +#' estima<-data.frame(Income, Employees) +#' x <- Taxes +#' tx <- c(N,sum(Lucy$Taxes)) +#' w <- Wk(x,tx,Pik,ck=1,b0=TRUE) +#' sum(1*w) +#' sum(x*w) +#' tx +#' # The calibration estimation +#' colSums(estima*w) +#' +#' ########### Multiple regression model with intercept ################### +#' +#' estima<-data.frame(Income) +#' x <- cbind(Employees, Taxes) +#' tx <- c(N, sum(Lucy$Employees), sum(Lucy$Taxes)) +#' w <- Wk(x,tx,Pik,ck=1,b0=TRUE) +#' sum(1*w) +#' sum(x[,1]*w) +#' sum(x[,2]*w) +#' tx +#' # The calibration estimation +#' colSums(estima*w) +#' +#' #################################################################### +#' ## Example 3: Linear models involving discrete auxiliary information +#' #################################################################### +#' +#' # Draws a simple random sample without replacement +#' data(Lucy) +#' attach(Lucy) +#' +#' N <- dim(Lucy)[1] +#' n <- 400 +#' sam <- S.SI(N,n) +#' # The information about the units in the sample is stored in an object called data +#' data <- Lucy[sam,] +#' attach(data) +#' names(data) +#' # Vector of inclusion probabilities for units in the selected sample +#' Pik<-rep(n/N,n) +#' # The auxiliary information is discrete type +#' Doma<-Domains(Level) +#' +#' ########### Poststratified common mean model ################### +#' +#' estima<-data.frame(Income, Employees, Taxes) +#' tx <- colSums(Domains(Lucy$Level)) +#' w <- Wk(Doma,tx,Pik,ck=1,b0=FALSE) +#' sum(Doma[,1]*w) +#' sum(Doma[,2]*w) +#' sum(Doma[,3]*w) +#' tx +#' # The calibration estimation +#' colSums(estima*w) +#' +#' ########### Poststratified common ratio model ################### +#' +#' estima<-data.frame(Income, Employees) +#' x<-Doma*Taxes +#' tx <- colSums(Domains(Lucy$Level)) +#' w <- Wk(x,tx,Pik,ck=1,b0=FALSE) +#' sum(x[,1]*w) +#' sum(x[,2]*w) +#' sum(x[,3]*w) +#' tx +#' # The calibration estimation +#' colSums(estima*w) Wk <- function(x, tx, Pik, ck, b0 = FALSE) { if (b0 == TRUE) x <- as.matrix(cbind(1, x)) diff --git a/R/nk.r b/R/nk.r index b8890e6..5196e9c 100644 --- a/R/nk.r +++ b/R/nk.r @@ -28,9 +28,10 @@ #' @seealso \code{\link{IkWR}}, \code{\link{SupportWR}}, \code{\link{p.WR}} #' #' @examples -#' # Frequency matrix: N = 3 units, m = 2 draws -#' N <- 3 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' N <- length(U) #' m <- 2 +#' # Frequency matrix for with-replacement sampling #' nk(N, m) nk <- function(N, m) { diff --git a/R/p.WR.r b/R/p.WR.r index cd4a95e..a229e67 100644 --- a/R/p.WR.r +++ b/R/p.WR.r @@ -30,12 +30,40 @@ #' @seealso \code{\link{nk}}, \code{\link{SupportWR}}, \code{\link{S.PPS}} #' #' @examples -#' # N = 3 units, m = 2 draws, equal probabilities -#' N <- 3 -#' m <- 2 -#' pk <- c(1/3, 1/3, 1/3) -#' p <- p.WR(N, m, pk) -#' sum(p) # must equal 1 +#' ############ +#' ## Example 1 +#' ############ +#' # With replacement simple random sampling +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vector pk is the sel?ection probability of the units in the finite population +#' pk <- c(0.2, 0.2, 0.2, 0.2, 0.2) +#' sum(pk) +#' N <- length(pk) +#' m <- 3 +#' # The smapling design +#' p <- p.WR(N, m, pk) +#' p +#' sum(p) +#' +#' ############ +#' ## Example 2 +#' ############ +#' # With replacement PPS random sampling +#' # Vector U contains the label of a population of size N=5 +#' U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +#' # Vector x is the auxiliary information and y is the variables of interest +#' x<-c(32, 34, 46, 89, 35) +#' y<-c(52, 60, 75, 100, 50) +#' # Vector pk is the sel?ection probability of the units in the finite population +#' pk <- x/sum(x) +#' sum(pk) +#' N <- length(pk) +#' m <- 3 +#' # The smapling design +#' p <- p.WR(N, m, pk) +#' p +#' sum(p) p.WR <- function(N, m, pk) { p <- rep(0, N) diff --git a/man/Deltakl.rd b/man/Deltakl.rd index 4eadd6d..3cfe049 100644 --- a/man/Deltakl.rd +++ b/man/Deltakl.rd @@ -34,11 +34,13 @@ second-order (\code{\link{Pikl}}) inclusion probabilities, so it is only feasible for small populations. } \examples{ -N <- 5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) n <- 2 p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -Delta <- Deltakl(N, n, p) -Delta +sum(p) +# Variance-Covariance matrix of the sample membership indicators +Deltakl(N, n, p) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/Domains.rd b/man/Domains.rd index f1c7555..a7493fe 100644 --- a/man/Domains.rd +++ b/man/Domains.rd @@ -28,15 +28,47 @@ matrix can be multiplied element-wise with the variable of interest to restrict estimation to each domain. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# This domain contains only two categories: "yes" and "no" +x <- as.factor(c("yes","yes","yes","no","no","no","no","yes","yes")) +Domains(x) + +############ +## Example 2 +############ +# Uses the Lucy data to draw a random sample of units according +# to a SI design +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n <- 400 -sam <- S.SI(N, n) -# Level has 3 domains: Small, Medium, Big -dom <- Domains(Level[sam]) -head(dom) -colSums(dom) # sample sizes per domain + +N <- dim(Lucy)[1] +n <- 400 +sam <- sample(N,n) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) +# The variable SPAM is a domain of interest +Doma <- Domains(SPAM) +Doma +# HT estimation of the absolute domain size for every category in the domain +# of interest +E.SI(N,n,Doma) + +############ +## Example 3 +############ +# Following with Example 2... +# The variables of interest are: Income, Employees and Taxes +# This function allows to estimate the population total of this variables for every +# category in the domain of interest SPAM +estima <- data.frame(Income, Employees, Taxes) +SPAM.no <- estima*Doma[,1] +SPAM.yes <- estima*Doma[,2] +E.SI(N,n,SPAM.no) +E.SI(N,n,SPAM.yes) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/E.2SI.rd b/man/E.2SI.rd index fee003a..fc84d23 100644 --- a/man/E.2SI.rd +++ b/man/E.2SI.rd @@ -43,30 +43,119 @@ component and the within-PSU component, following the classical two-stage variance decomposition of Sarndal et al. (1992). } \examples{ -library(TeachingSampling) -data('BigCity') -library(dplyr) -Households <- BigCity \%>\% - group_by(HHID) \%>\% - summarise(PSU = unique(PSU), - Persons = n(), - Income = sum(Income), - Expenditure = sum(Expenditure)) - -UI <- levels(as.factor(Households$PSU)) +############ +## Example 1 +############ +# Uses Lucy data to draw a twostage simple random sample +# accordind to a 2SI design. Zone is the clustering variable +data(Lucy) +attach(Lucy) +summary(Zone) +# The population of clusters or Primary Sampling Units +UI<-c("A","B","C","D","E") NI <- length(UI) -nI <- 10 -samI <- S.SI(NI, nI) -sampleI <- UI[samI] -CityI <- Households[Households$PSU \%in\% sampleI, ] - -Ni <- as.numeric(table(CityI$PSU)) -ni <- ceiling(Ni * 0.2) +# The sample size is nI=3 +nI <- 3 +# Selects the sample of PSUs +samI<-S.SI(NI,nI) +dataI<-UI[samI] +dataI +# The sampling frame of Secondary Sampling Unit is saved in Lucy1 ... Lucy3 +Lucy1<-Lucy[which(Zone==dataI[1]),] +Lucy2<-Lucy[which(Zone==dataI[2]),] +Lucy3<-Lucy[which(Zone==dataI[3]),] +# The size of every single PSU +N1<-dim(Lucy1)[1] +N2<-dim(Lucy2)[1] +N3<-dim(Lucy3)[1] +Ni<-c(N1,N2,N3) +# The sample size in every PSI is 135 Secondary Sampling Units +n1<-135 +n2<-135 +n3<-135 +ni<-c(n1,n2,n3) +# Selects a sample of Secondary Sampling Units inside the PSUs +sam1<-S.SI(N1,n1) +sam2<-S.SI(N2,n2) +sam3<-S.SI(N3,n3) +# The information about each Secondary Sampling Unit in the PSUs +# is saved in data1 ... data3 +data1<-Lucy1[sam1,] +data2<-Lucy2[sam2,] +data3<-Lucy3[sam3,] +# The information about each unit in the final selected sample is saved in data +data<-rbind(data1, data2, data3) +attach(data) +# The clustering variable is Zone +Cluster <- as.factor(as.integer(Zone)) +# The variables of interest are: Income, Employees and Taxes +# This information is stored in a data frame called estima +estima <- data.frame(Income, Employees, Taxes) +# Estimation of the Population total +E.2SI(NI,nI,Ni,ni,estima,Cluster) -estima <- data.frame(CityI$Persons, CityI$Income, CityI$Expenditure) -area <- as.factor(CityI$PSU) - -E.2SI(NI, nI, Ni, ni, estima, area) +######################################################## +## Example 2 Total Census to the entire population +######################################################## +# Uses Lucy data to draw a cluster random sample +# accordind to a SI design ... +# Zone is the clustering variable +data(Lucy) +attach(Lucy) +summary(Zone) +# The population of clusters +UI<-c("A","B","C","D","E") +NI <- length(UI) +# The sample size equals to the population size of PSU +nI <- NI +# Selects every single PSU +samI<-S.SI(NI,nI) +dataI<-UI[samI] +dataI +# The sampling frame of Secondary Sampling Unit is saved in Lucy1 ... Lucy5 +Lucy1<-Lucy[which(Zone==dataI[1]),] +Lucy2<-Lucy[which(Zone==dataI[2]),] +Lucy3<-Lucy[which(Zone==dataI[3]),] +Lucy4<-Lucy[which(Zone==dataI[4]),] +Lucy5<-Lucy[which(Zone==dataI[5]),] +# The size of every single PSU +N1<-dim(Lucy1)[1] +N2<-dim(Lucy2)[1] +N3<-dim(Lucy3)[1] +N4<-dim(Lucy4)[1] +N5<-dim(Lucy5)[1] +Ni<-c(N1,N2,N3,N4,N5) +# The sample size of Secondary Sampling Units equals to the size of each PSU +n1<-N1 +n2<-N2 +n3<-N3 +n4<-N4 +n5<-N5 +ni<-c(n1,n2,n3,n4,n5) +# Selects every single Secondary Sampling Unit inside the PSU +sam1<-S.SI(N1,n1) +sam2<-S.SI(N2,n2) +sam3<-S.SI(N3,n3) +sam4<-S.SI(N4,n4) +sam5<-S.SI(N5,n5) +# The information about each unit in the cluster is saved in Lucy1 ... Lucy5 +data1<-Lucy1[sam1,] +data2<-Lucy2[sam2,] +data3<-Lucy3[sam3,] +data4<-Lucy4[sam4,] +data5<-Lucy5[sam5,] +# The information about each Secondary Sampling Unit +# in the sample (census) is saved in data +data<-rbind(data1, data2, data3, data4, data5) +attach(data) +# The clustering variable is Zone +Cluster <- as.factor(as.integer(Zone)) +# The variables of interest are: Income, Employees and Taxes +# This information is stored in a data frame called estima +estima <- data.frame(Income, Employees, Taxes) +# Estimation of the Population total +E.2SI(NI,nI,Ni,ni,estima,Cluster) +# Sampling error is null } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/E.Beta.rd b/man/E.Beta.rd index 4077753..7557eb4 100644 --- a/man/E.Beta.rd +++ b/man/E.Beta.rd @@ -44,14 +44,88 @@ optional variance-stabilising constant. The variance is estimated using the residual-based sandwich approach of Sarndal et al. (1992). } \examples{ -data('Lucy') +###################################################################### +## Example 1: Linear models involving continuous auxiliary information +###################################################################### + +# Draws a simple random sample without replacement +data(Lucy) attach(Lucy) -N <- nrow(Lucy) + +N <- dim(Lucy)[1] n <- 400 sam <- S.SI(N, n) -y <- data.frame(Income = Income[sam]) -x <- data.frame(Employees = Employees[sam]) -E.Beta(N, n, y, x, b0 = TRUE) +# The information about the units in the sample +# is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) + +########### common mean model + +estima<-data.frame(Income, Employees, Taxes) +x <- rep(1,n) +E.Beta(N, n, estima,x,ck=1,b0=FALSE) + + +########### common ratio model + +estima<-data.frame(Income) +x <- data.frame(Employees) +E.Beta(N, n, estima,x,ck=x,b0=FALSE) + +########### Simple regression model without intercept + +estima<-data.frame(Income, Employees) +x <- data.frame(Taxes) +E.Beta(N, n, estima,x,ck=1,b0=FALSE) + +########### Multiple regression model without intercept + +estima<-data.frame(Income) +x <- data.frame(Employees, Taxes) +E.Beta(N, n, estima,x,ck=1,b0=FALSE) + +########### Simple regression model with intercept + +estima<-data.frame(Income, Employees) +x <- data.frame(Taxes) +E.Beta(N, n, estima,x,ck=1,b0=TRUE) + +########### Multiple regression model with intercept + +estima<-data.frame(Income) +x <- data.frame(Employees, Taxes) +E.Beta(N, n, estima,x,ck=1,b0=TRUE) + +############################################################### +## Example 2: Linear models with discrete auxiliary information +############################################################### + +# Draws a simple random sample without replacement +data(Lucy) +attach(Lucy) + +N <- dim(Lucy)[1] +n <- 400 +sam <- S.SI(N,n) +# The information about the sample units is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) +# The auxiliary information +Doma<-Domains(Level) + +########### Poststratified common mean model + +estima<-data.frame(Income, Employees, Taxes) +E.Beta(N, n, estima,Doma,ck=1,b0=FALSE) + +########### Poststratified common ratio model + +estima<-data.frame(Income, Employees) +x<-Doma*Taxes +E.Beta(N, n, estima,x,ck=1,b0=FALSE) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/E.Quantile.rd b/man/E.Quantile.rd index c6d1209..652b77d 100644 --- a/man/E.Quantile.rd +++ b/man/E.Quantile.rd @@ -33,18 +33,32 @@ cumulative weights are computed, and the quantile is located by interpolation. } \examples{ -data('Lucy') +############ +## Example 1 +############ +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +y <- c(32, 34, 46, 89, 35) +x <- c(52, 60, 75, 100, 50) +z <- cbind(y, x) +Pik <- c(0.58, 0.34, 0.48, 0.33, 0.27) +E.Quantile(y, 0.5) +E.Quantile(x, 0.25) +E.Quantile(z, 0.75) +E.Quantile(z, 0.5, Pik) +############ +## Example 2 +############ +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n <- 400 -sam <- S.SI(N, n) -Pik <- rep(n/N, n) -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) - -# Median -E.Quantile(y, Qn = 0.5, Pik = Pik) -# First quartile -E.Quantile(y, Qn = 0.25, Pik = Pik) +m <- 400 +res <- S.PPS(m, Income) +sam <- res[, 1] +pk.s <- res[, 2] +Pik.s <- 1 - (1 - pk.s)^m +data <- Lucy[sam, ] +attach(data) +estima <- data.frame(Income, Employees, Taxes) +E.Quantile(estima, 0.5, Pik.s) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/E.SI.rd b/man/E.SI.rd index b123ae8..46fd8b2 100644 --- a/man/E.SI.rd +++ b/man/E.SI.rd @@ -35,13 +35,89 @@ estimator reduces to \eqn{\hat{t}_y = N \bar{y}_s}, the expansion estimator. The design effect is always 1 because SI is the reference design. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# Uses the Lucy data to draw a random sample of units according to a SI design +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n <- 400 -sam <- S.SI(N, n) -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.SI(N, n, y) + +N <- dim(Lucy)[1] +n <- 400 +sam <- S.SI(N,n) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) +# The variables of interest are: Income, Employees and Taxes +# This information is stored in a data frame called estima +estima <- data.frame(Income, Employees, Taxes) +E.SI(N,n,estima) + +############ +## Example 2 +############ +# Following with Example 1. The variable SPAM is a domain of interest +Doma <- Domains(SPAM) +# This function allows to estimate the size of each domain in SPAM +estima <- data.frame(Doma) +E.SI(N,n,Doma) + +############ +## Example 3 +############ +# Following with Example 1. The variable SPAM is a domain of interest +Doma <- Domains(SPAM) +# This function allows to estimate the parameters of the variables of interest +# for every category in the domain SPAM +estima <- data.frame(Income, Employees, Taxes) +SPAM.no <- cbind(Doma[,1], estima*Doma[,1]) +SPAM.yes <- cbind(Doma[,1], estima*Doma[,2]) +# Before running the following lines, notice that: +# The first column always indicates the population size +# The second column is an estimate of the size of the category in the domain SPAM +# The remaining columns estimates the parameters of interest +# within the corresponding category in the domain SPAM +E.SI(N,n,SPAM.no) +E.SI(N,n,SPAM.yes) + +############ +## Example 4 +############ +# Following with Example 1. The variable SPAM is a domain of interest +# and the variable ISO is a populational subgroup of interest +Doma <- Domains(SPAM) +estima <- Domains(Zone) +# Before running the following lines, notice that: +# The first column indicates wheter the unit +# belongs to the first category of SPAM or not +# The remaining columns indicates wheter the unit +# belogns to the categories of Zone +SPAM.no <- data.frame(SpamNO=Doma[,1], Zones=estima*Doma[,1]) +# Before running the following lines, notice that: +# The first column indicates wheter the unit +# belongs to the second category of SPAM or not +# The remaining columns indicates wheter the unit +# belogns to the categories of Zone +SPAM.yes <- data.frame(SpamYES=Doma[,2], Zones=estima*Doma[,2]) +# Before running the following lines, notice that: +# The first column always indicates the population size +# The second column is an estimate of the size of the +# first category in the domain SPAM +# The remaining columns estimates the size of the categories +# of Zone within the corresponding category of SPAM +# Finnaly, note that the sum of the point estimates of the last +# two columns gives exactly the point estimate in the second column +E.SI(N,n,SPAM.no) +# Before running the following lines, notice that: +# The first column always indicates the population size +# The second column is an estimate of the size of the +# second category in the domain SPAM +# The remaining columns estimates the size of the categories +# of Zone within the corresponding category of SPAM +# Finnaly, note that the sum of the point estimates of the last two +# columns gives exactly the point estimate in the second column +E.SI(N,n,SPAM.yes) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/E.STPPS.Rd b/man/E.STPPS.Rd new file mode 100644 index 0000000..b54f803 --- /dev/null +++ b/man/E.STPPS.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.STPPS.r +\name{E.STPPS} +\alias{E.STPPS} +\title{Estimation of the Population Total under Stratified PPS With-Replacement Sampling} +\usage{ +E.STPPS(y, pk, mh, S) +} +\arguments{ +\item{y}{Vector, matrix or data frame of variables of interest.} + +\item{pk}{Vector of selection probabilities for each draw in the sample.} + +\item{mh}{Integer vector with the number of draws within each stratum.} + +\item{S}{Vector identifying the stratum membership of each unit in the sample.} +} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. +} +} +\description{ +Computes the Hansen-Hurwitz estimator of the population total under a +stratified PPS with-replacement (STPPS) sampling design. +} +\examples{ +# Uses the Lucy data to draw a stratified random sample +# according to a PPS design in each stratum +data(Lucy) +attach(Lucy) +m1 <- 83; m2 <- 100; m3 <- 200 +mh <- c(m1, m2, m3) +res <- S.STPPS(Level, Income, mh) +sam <- res[, 1] +pk <- res[, 2] +data <- Lucy[sam, ] +attach(data) +estima <- data.frame(Income, Employees, Taxes) +E.STPPS(estima, pk, mh, Level) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.STPPS}}, \code{\link{E.PPS}}, \code{\link{E.STpiPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/E.STSI.Rd b/man/E.STSI.Rd new file mode 100644 index 0000000..395db8b --- /dev/null +++ b/man/E.STSI.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.STSI.r +\name{E.STSI} +\alias{E.STSI} +\title{Estimation of the Population Total under Stratified Simple Random Sampling} +\usage{ +E.STSI(S, Nh, nh, y) +} +\arguments{ +\item{S}{Vector identifying the stratum membership of each unit in the sample.} + +\item{Nh}{Integer vector with the population size of each stratum.} + +\item{nh}{Integer vector with the sample size of each stratum.} + +\item{y}{Vector, matrix or data frame of variables of interest.} +} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. +} +} +\description{ +Computes the Horvitz-Thompson estimator of the population total under a +stratified simple random sampling without replacement (STSI) design. +} +\examples{ +############ +## Example 1 +############ +data(Lucy) +attach(Lucy) +N1 <- summary(Level)[[1]] +N2 <- summary(Level)[[2]] +N3 <- summary(Level)[[3]] +Nh <- c(N1, N2, N3) +n1 <- N1; n2 <- 100; n3 <- 200 +nh <- c(n1, n2, n3) +sam <- S.STSI(Level, Nh, nh) +data <- Lucy[sam, ] +attach(data) +estima <- data.frame(Income, Employees, Taxes) +E.STSI(Level, Nh, nh, estima) +############ +## Example 2 +############ +# The variable SPAM is a domain of interest +Doma <- Domains(SPAM) +SPAM.no <- estima * Doma[, 1] +SPAM.yes <- estima * Doma[, 2] +E.STSI(Level, Nh, nh, Doma) +E.STSI(Level, Nh, nh, SPAM.no) +E.STSI(Level, Nh, nh, SPAM.yes) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.STSI}}, \code{\link{E.SI}}, \code{\link{E.STpiPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/E.STpiPS.Rd b/man/E.STpiPS.Rd new file mode 100644 index 0000000..0409a83 --- /dev/null +++ b/man/E.STpiPS.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/E.STpiPS.R +\name{E.STpiPS} +\alias{E.STpiPS} +\title{Estimation of the Population Total under Stratified piPS Sampling} +\usage{ +E.STpiPS(y, Pik, S) +} +\arguments{ +\item{y}{Vector, matrix or data frame of variables of interest.} + +\item{Pik}{Vector of first-order inclusion probabilities for each unit +in the sample.} + +\item{S}{Vector identifying the stratum membership of each unit in the sample.} +} +\value{ +A matrix with four rows and one column per variable of interest: +\itemize{ + \item \code{Estimation}: Estimated population total. + \item \code{Standard Error}: Estimated standard error. + \item \code{CVE}: Estimated coefficient of variation (in percentage). + \item \code{DEFF}: Design effect with respect to simple random sampling. +} +} +\description{ +Computes the Horvitz-Thompson estimator of the population total under a +stratified without-replacement probability proportional to size (piPS) +sampling design. +} +\examples{ +# Uses the Lucy data to draw a stratified random sample +# according to a piPS design in each stratum +data(Lucy) +attach(Lucy) +N1 <- summary(Level)[[1]] +N2 <- summary(Level)[[2]] +N3 <- summary(Level)[[3]] +nh <- c(N1, 100, 200) +S <- Level +x <- Employees +res <- S.STpiPS(S, x, nh) +sam <- res[, 1] +pik <- res[, 2] +data <- Lucy[sam, ] +attach(data) +estima <- data.frame(Income, Employees, Taxes) +E.STpiPS(estima, pik, Level) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{S.STpiPS}}, \code{\link{E.piPS}}, \code{\link{E.STSI}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/GREG.SI.rd b/man/GREG.SI.rd index 976303b..32fd044 100644 --- a/man/GREG.SI.rd +++ b/man/GREG.SI.rd @@ -45,16 +45,155 @@ estimated from the sample, \eqn{\mathbf{t}_x} are the known population totals, and variance is estimated from the residuals. } \examples{ -data('Lucy') +###################################################################### +## Example 1: Linear models involving continuous auxiliary information +###################################################################### + +# Draws a simple random sample without replacement +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n <- 400 -sam <- S.SI(N, n) -y <- data.frame(Income = Income[sam]) -x <- data.frame(Employees = Employees[sam]) -tx <- sum(Employees) -b <- E.Beta(N, n, y, x, b0 = FALSE) -GREG.SI(N, n, y, x, tx, b) + +N <- dim(Lucy)[1] +n <- 400 +sam <- S.SI(N,n) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) + +########### common mean model + +estima<-data.frame(Income, Employees, Taxes) +x <- rep(1,n) +model <- E.Beta(N, n, estima, x, ck=1,b0=FALSE) +b <- t(as.matrix(model[1,,])) +tx <- c(N) +GREG.SI(N,n,estima,x,tx, b, b0=FALSE) + +########### common ratio model + +estima<-data.frame(Income) +x <- data.frame(Employees) +model <- E.Beta(N, n, estima, x, ck=x,b0=FALSE) +b <- t(as.matrix(model[1,,])) +tx <- sum(Lucy$Employees) +GREG.SI(N,n,estima,x,tx, b, b0=FALSE) + +########### Simple regression model without intercept + +estima<-data.frame(Income, Employees) +x <- data.frame(Taxes) +model <- E.Beta(N, n, estima, x, ck=1,b0=FALSE) +b <- t(as.matrix(model[1,,])) +tx <- sum(Lucy$Taxes) +GREG.SI(N,n,estima,x,tx, b, b0=FALSE) + +########### Multiple regression model without intercept + +estima<-data.frame(Income) +x <- data.frame(Employees, Taxes) +model <- E.Beta(N, n, estima, x, ck=1, b0=FALSE) +b <- as.matrix(model[1,,]) +tx <- c(sum(Lucy$Employees), sum(Lucy$Taxes)) +GREG.SI(N,n,estima,x,tx, b, b0=FALSE) + +########### Simple regression model with intercept + +estima<-data.frame(Income, Employees) +x <- data.frame(Taxes) +model <- E.Beta(N, n, estima, x, ck=1,b0=TRUE) +b <- as.matrix(model[1,,]) +tx <- c(N, sum(Lucy$Taxes)) +GREG.SI(N,n,estima,x,tx, b, b0=TRUE) + +########### Multiple regression model with intercept + +estima<-data.frame(Income) +x <- data.frame(Employees, Taxes) +model <- E.Beta(N, n, estima, x, ck=1,b0=TRUE) +b <- as.matrix(model[1,,]) +tx <- c(N, sum(Lucy$Employees), sum(Lucy$Taxes)) +GREG.SI(N,n,estima,x,tx, b, b0=TRUE) + +#################################################################### +## Example 2: Linear models with discrete auxiliary information +#################################################################### + +# Draws a simple random sample without replacement +data(Lucy) + +N <- dim(Lucy)[1] +n <- 400 +sam <- S.SI(N,n) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) + +# The auxiliary information is discrete type +Doma<-Domains(Level) + +########### Poststratified common mean model + +estima<-data.frame(Income, Employees, Taxes) +model <- E.Beta(N, n, estima, Doma, ck=1,b0=FALSE) +b <- t(as.matrix(model[1,,])) +tx <- colSums(Domains(Lucy$Level)) +GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) + +########### Poststratified common ratio model + +estima<-data.frame(Income, Employees) +x <- Doma*Taxes +model <- E.Beta(N, n, estima, x ,ck=1,b0=FALSE) +b <- as.matrix(model[1,,]) +tx <- colSums(Domains(Lucy$Level)*Lucy$Taxes) +GREG.SI(N,n,estima,x,tx, b, b0=FALSE) + +###################################################################### +## Example 3: Domains estimation trough the postestratified estimator +###################################################################### + +# Draws a simple random sample without replacement +data(Lucy) + +N <- dim(Lucy)[1] +n <- 400 +sam <- S.SI(N,n) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) + +# The auxiliary information is discrete type +Doma<-Domains(Level) + +########### Poststratified common mean model for the + # Income total in each poststratum ################### + +estima<-Doma*Income +model <- E.Beta(N, n, estima, Doma, ck=1, b0=FALSE) +b <- t(as.matrix(model[1,,])) +tx <- colSums(Domains(Lucy$Level)) +GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) + +########### Poststratified common mean model for the + # Employees total in each poststratum ################### + +estima<-Doma*Employees +model <- E.Beta(N, n, estima, Doma, ck=1,b0=FALSE) +b <- t(as.matrix(model[1,,])) +tx <- colSums(Domains(Lucy$Level)) +GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) + +########### Poststratified common mean model for the + # Taxes total in each poststratum ################### + +estima<-Doma*Taxes +model <- E.Beta(N, n, estima, Doma, ck=1, b0=FALSE) +b <- t(as.matrix(model[1,,])) +tx <- colSums(Domains(Lucy$Level)) +GREG.SI(N,n,estima,Doma,tx, b, b0=FALSE) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/HH.Rd b/man/HH.Rd new file mode 100644 index 0000000..fcd94da --- /dev/null +++ b/man/HH.Rd @@ -0,0 +1,145 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/HH.r +\name{HH} +\alias{HH} +\title{Hansen-Hurwitz Estimator of the Population Total} +\usage{ +HH(y, pk) +} +\arguments{ +\item{y}{Vector or matrix of values of the variable(s) of interest for +units in the sample (with possible repetitions).} + +\item{pk}{Vector of selection probabilities for each draw in the sample.} +} +\value{ +A numeric vector or matrix with the estimated total for each variable +of interest. +} +\description{ +Computes the Hansen-Hurwitz (HH) estimator of the population total under +a with-replacement sampling design, given the sample observations and +their selection probabilities. +} +\details{ +The Hansen-Hurwitz estimator is: +\deqn{\hat{t}_{HH} = \frac{1}{m}\sum_{i=1}^m \frac{y_i}{p_i}} +where \eqn{p_i} is the selection probability of the \eqn{i}-th draw +and \eqn{m} is the number of draws. This estimator is design-unbiased +under any with-replacement sampling design. +} +\examples{ +############ +## Example 1 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vectors y1 and y2 give the values of the variables of interest +y1<-c(32, 34, 46, 89, 35) +y2<-c(1,1,1,0,0) +y3<-cbind(y1,y2) +# The population size is N=5 +N <- length(U) +# The sample size is m=2 +m <- 2 +# pk is the probability of selection of every single unit +pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) +# Selection of a random sample with replacement +sam <- sample(5,2, replace=TRUE, prob=pk) +# The selected sample is +U[sam] +# The values of the variables of interest for the units in the sample +y1[sam] +y2[sam] +y3[sam,] +# The Hansen-Hurwitz estimator +HH(y1[sam],pk[sam]) +HH(y2[sam],pk[sam]) +HH(y3[sam,],pk[sam]) + + +############ +## Example 2 +############ +# Uses the Lucy data to draw a simple random sample with replacement +data(Lucy) +attach(Lucy) + +N <- dim(Lucy)[1] +m <- 400 +sam <- sample(N,m,replace=TRUE) +# The vector of selection probabilities of units in the sample +pk <- rep(1/N,m) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) +# The variables of interest are: Income, Employees and Taxes +# This information is stored in a data frame called estima +estima <- data.frame(Income, Employees, Taxes) +HH(estima, pk) + +################################################################ +## Example 3 HH is unbiased for with replacement sampling designs +################################################################ + +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vector y1 and y2 are the values of the variables of interest +y<-c(32, 34, 46, 89, 35) +# The population size is N=5 +N <- length(U) +# The sample size is m=2 +m <- 2 +# pk is the probability of selection of every single unit +pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) +# p is the probability of selection of every possible sample +p <- p.WR(N,m,pk) +p +sum(p) +# The sample membership matrix for random size without replacement sampling designs +Ind <- nk(N,m) +Ind +# The support with the values of the elements +Qy <- SupportWR(N,m, ID=y) +Qy +# The support with the values of the elements +Qp <- SupportWR(N,m, ID=pk) +Qp +# The HT estimates for every single sample in the support +HH1 <- HH(Qy[1,], Qp[1,])[1,] +HH2 <- HH(Qy[2,], Qp[2,])[1,] +HH3 <- HH(Qy[3,], Qp[3,])[1,] +HH4 <- HH(Qy[4,], Qp[4,])[1,] +HH5 <- HH(Qy[5,], Qp[5,])[1,] +HH6 <- HH(Qy[6,], Qp[6,])[1,] +HH7 <- HH(Qy[7,], Qp[7,])[1,] +HH8 <- HH(Qy[8,], Qp[8,])[1,] +HH9 <- HH(Qy[9,], Qp[9,])[1,] +HH10 <- HH(Qy[10,], Qp[10,])[1,] +HH11 <- HH(Qy[11,], Qp[11,])[1,] +HH12 <- HH(Qy[12,], Qp[12,])[1,] +HH13 <- HH(Qy[13,], Qp[13,])[1,] +HH14 <- HH(Qy[14,], Qp[14,])[1,] +HH15 <- HH(Qy[15,], Qp[15,])[1,] +# The HT estimates arranged in a vector +Est <- c(HH1, HH2, HH3, HH4, HH5, HH6, HH7, HH8, HH9, HH10, HH11, HH12, HH13, +HH14, HH15) +Est +# The HT is actually desgn-unbiased +data.frame(Ind, Est, p) +sum(Est*p) +sum(y) +} +\references{ +Hansen, M.H. and Hurwitz, W.N. (1943). On the theory of sampling from +finite populations. \emph{Annals of Mathematical Statistics}, 14, 333-362.\cr +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer. +} +\seealso{ +\code{\link{E.PPS}}, \code{\link{HT}}, \code{\link{S.PPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/HT.rd b/man/HT.rd index f2c9764..c272605 100644 --- a/man/HT.rd +++ b/man/HT.rd @@ -29,16 +29,277 @@ where \eqn{\pi_k} is the first-order inclusion probability of unit \eqn{k}. This estimator is design-unbiased for any fixed-size sampling design. } \examples{ -# Population N = 5, sample size n = 2 -N <- 5 +############ +## Example 1 +############ +# Uses the Lucy data to draw a simple random sample without replacement +data(Lucy) +attach(Lucy) + +N <- dim(Lucy)[1] +n <- 400 +sam <- sample(N,n) +# The vector of inclusion probabilities for each unit in the sample +pik <- rep(n/N,n) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) +# The variables of interest are: Income, Employees and Taxes +# This information is stored in a data frame called estima +estima <- data.frame(Income, Employees, Taxes) +HT(estima, pik) + +############ +## Example 2 +############ +# Uses the Lucy data to draw a simple random sample with replacement +data(Lucy) + +N <- dim(Lucy)[1] +m <- 400 +sam <- sample(N,m,replace=TRUE) +# The vector of selection probabilities of units in the sample +pk <- rep(1/N,m) +# Computation of the inclusion probabilities +pik <- 1-(1-pk)^m +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) +# The variables of interest are: Income, Employees and Taxes +# This information is stored in a data frame called estima +estima <- data.frame(Income, Employees, Taxes) +HT(estima, pik) + +############ +## Example 3 +############ +# Without replacement sampling +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vector y1 and y2 are the values of the variables of interest +y1<-c(32, 34, 46, 89, 35) +y2<-c(1,1,1,0,0) +y3<-cbind(y1,y2) +# The population size is N=5 +N <- length(U) +# The sample size is n=2 n <- 2 +# The sample membership matrix for fixed size without replacement sampling designs +Ind <- Ik(N,n) +# p is the probability of selection of every possible sample p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -y <- c(32, 34, 46, 89, 35) -Ind <- Ik(N, n) -pik <- as.vector(Pik(p, Ind)) -# Select first sample (units 1 and 2) -sam <- c(1, 2) -HT(y[sam], pik[sam]) +# Computation of the inclusion probabilities +inclusion <- Pik(p, Ind) +# Selection of a random sample +sam <- sample(5,2) +# The selected sample +U[sam] +# The inclusion probabilities for these two units +inclusion[sam] +# The values of the variables of interest for the units in the sample +y1[sam] +y2[sam] +y3[sam,] +# The Horvitz-Thompson estimator +HT(y1[sam],inclusion[sam]) +HT(y2[sam],inclusion[sam]) +HT(y3[sam,],inclusion[sam]) + +############ +## Example 4 +############ +# Following Example 3... With replacement sampling +# The population size is N=5 +N <- length(U) +# The sample size is m=2 +m <- 2 +# pk is the probability of selection of every single unit +pk <- c(0.9, 0.025, 0.025, 0.025, 0.025) +# Computation of the inclusion probabilities +pik <- 1-(1-pk)^m +# Selection of a random sample with replacement +sam <- sample(5,2, replace=TRUE, prob=pk) +# The selected sample +U[sam] +# The inclusion probabilities for these two units +inclusion[sam] +# The values of the variables of interest for the units in the sample +y1[sam] +y2[sam] +y3[sam,] +# The Horvitz-Thompson estimator +HT(y1[sam],inclusion[sam]) +HT(y2[sam],inclusion[sam]) +HT(y3[sam,],inclusion[sam]) + +#################################################################### +## Example 5 HT is unbiased for without replacement sampling designs +## Fixed sample size +#################################################################### + +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vector y1 and y2 are the values of the variables of interest +y<-c(32, 34, 46, 89, 35) +# The population size is N=5 +N <- length(U) +# The sample size is n=2 +n <- 2 +# The sample membership matrix for fixed size without replacement sampling designs +Ind <- Ik(N,n) +Ind +# p is the probability of selection of every possible sample +p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) +sum(p) +# Computation of the inclusion probabilities +inclusion <- Pik(p, Ind) +inclusion +sum(inclusion) +# The support with the values of the elements +Qy <-Support(N,n,ID=y) +Qy +# The HT estimates for every single sample in the support +HT1<- HT(y[Ind[1,]==1], inclusion[Ind[1,]==1]) +HT2<- HT(y[Ind[2,]==1], inclusion[Ind[2,]==1]) +HT3<- HT(y[Ind[3,]==1], inclusion[Ind[3,]==1]) +HT4<- HT(y[Ind[4,]==1], inclusion[Ind[4,]==1]) +HT5<- HT(y[Ind[5,]==1], inclusion[Ind[5,]==1]) +HT6<- HT(y[Ind[6,]==1], inclusion[Ind[6,]==1]) +HT7<- HT(y[Ind[7,]==1], inclusion[Ind[7,]==1]) +HT8<- HT(y[Ind[8,]==1], inclusion[Ind[8,]==1]) +HT9<- HT(y[Ind[9,]==1], inclusion[Ind[9,]==1]) +HT10<- HT(y[Ind[10,]==1], inclusion[Ind[10,]==1]) +# The HT estimates arranged in a vector +Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10) +Est +# The HT is actually desgn-unbiased +data.frame(Ind, Est, p) +sum(Est*p) +sum(y) + +#################################################################### +## Example 6 HT is unbiased for without replacement sampling designs +## Random sample size +#################################################################### + +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vector y1 and y2 are the values of the variables of interest +y<-c(32, 34, 46, 89, 35) +# The population size is N=5 +N <- length(U) +# The sample membership matrix for random size without replacement sampling designs +Ind <- IkRS(N) +Ind +# p is the probability of selection of every possible sample +p <- c(0.59049, 0.06561, 0.06561, 0.06561, 0.06561, 0.06561, 0.00729, 0.00729, + 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00729, 0.00081, + 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, 0.00081, + 0.00009, 0.00009, 0.00009, 0.00009, 0.00009, 0.00001) +sum(p) +# Computation of the inclusion probabilities +inclusion <- Pik(p, Ind) +inclusion +sum(inclusion) +# The support with the values of the elements +Qy <-SupportRS(N, ID=y) +Qy +# The HT estimates for every single sample in the support +HT1<- HT(y[Ind[1,]==1], inclusion[Ind[1,]==1]) +HT2<- HT(y[Ind[2,]==1], inclusion[Ind[2,]==1]) +HT3<- HT(y[Ind[3,]==1], inclusion[Ind[3,]==1]) +HT4<- HT(y[Ind[4,]==1], inclusion[Ind[4,]==1]) +HT5<- HT(y[Ind[5,]==1], inclusion[Ind[5,]==1]) +HT6<- HT(y[Ind[6,]==1], inclusion[Ind[6,]==1]) +HT7<- HT(y[Ind[7,]==1], inclusion[Ind[7,]==1]) +HT8<- HT(y[Ind[8,]==1], inclusion[Ind[8,]==1]) +HT9<- HT(y[Ind[9,]==1], inclusion[Ind[9,]==1]) +HT10<- HT(y[Ind[10,]==1], inclusion[Ind[10,]==1]) +HT11<- HT(y[Ind[11,]==1], inclusion[Ind[11,]==1]) +HT12<- HT(y[Ind[12,]==1], inclusion[Ind[12,]==1]) +HT13<- HT(y[Ind[13,]==1], inclusion[Ind[13,]==1]) +HT14<- HT(y[Ind[14,]==1], inclusion[Ind[14,]==1]) +HT15<- HT(y[Ind[15,]==1], inclusion[Ind[15,]==1]) +HT16<- HT(y[Ind[16,]==1], inclusion[Ind[16,]==1]) +HT17<- HT(y[Ind[17,]==1], inclusion[Ind[17,]==1]) +HT18<- HT(y[Ind[18,]==1], inclusion[Ind[18,]==1]) +HT19<- HT(y[Ind[19,]==1], inclusion[Ind[19,]==1]) +HT20<- HT(y[Ind[20,]==1], inclusion[Ind[20,]==1]) +HT21<- HT(y[Ind[21,]==1], inclusion[Ind[21,]==1]) +HT22<- HT(y[Ind[22,]==1], inclusion[Ind[22,]==1]) +HT23<- HT(y[Ind[23,]==1], inclusion[Ind[23,]==1]) +HT24<- HT(y[Ind[24,]==1], inclusion[Ind[24,]==1]) +HT25<- HT(y[Ind[25,]==1], inclusion[Ind[25,]==1]) +HT26<- HT(y[Ind[26,]==1], inclusion[Ind[26,]==1]) +HT27<- HT(y[Ind[27,]==1], inclusion[Ind[27,]==1]) +HT28<- HT(y[Ind[28,]==1], inclusion[Ind[28,]==1]) +HT29<- HT(y[Ind[29,]==1], inclusion[Ind[29,]==1]) +HT30<- HT(y[Ind[30,]==1], inclusion[Ind[30,]==1]) +HT31<- HT(y[Ind[31,]==1], inclusion[Ind[31,]==1]) +HT32<- HT(y[Ind[32,]==1], inclusion[Ind[32,]==1]) +# The HT estimates arranged in a vector +Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10, HT11, HT12, HT13, + HT14, HT15, HT16, HT17, HT18, HT19, HT20, HT21, HT22, HT23, HT24, HT25, HT26, + HT27, HT28, HT29, HT30, HT31, HT32) +Est +# The HT is actually desgn-unbiased +data.frame(Ind, Est, p) +sum(Est*p) +sum(y) + +################################################################ +## Example 7 HT is unbiased for with replacement sampling designs +################################################################ + +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vector y1 and y2 are the values of the variables of interest +y<-c(32, 34, 46, 89, 35) +# The population size is N=5 +N <- length(U) +# The sample size is m=2 +m <- 2 +# pk is the probability of selection of every single unit +pk <- c(0.35, 0.225, 0.175, 0.125, 0.125) +# p is the probability of selection of every possible sample +p <- p.WR(N,m,pk) +p +sum(p) +# The sample membership matrix for random size without replacement sampling designs +Ind <- IkWR(N,m) +Ind +# The support with the values of the elements +Qy <- SupportWR(N,m, ID=y) +Qy +# Computation of the inclusion probabilities +pik <- 1-(1-pk)^m +pik +# The HT estimates for every single sample in the support +HT1 <- HT(y[Ind[1,]==1], pik[Ind[1,]==1]) +HT2 <- HT(y[Ind[2,]==1], pik[Ind[2,]==1]) +HT3 <- HT(y[Ind[3,]==1], pik[Ind[3,]==1]) +HT4 <- HT(y[Ind[4,]==1], pik[Ind[4,]==1]) +HT5 <- HT(y[Ind[5,]==1], pik[Ind[5,]==1]) +HT6 <- HT(y[Ind[6,]==1], pik[Ind[6,]==1]) +HT7 <- HT(y[Ind[7,]==1], pik[Ind[7,]==1]) +HT8 <- HT(y[Ind[8,]==1], pik[Ind[8,]==1]) +HT9 <- HT(y[Ind[9,]==1], pik[Ind[9,]==1]) +HT10 <- HT(y[Ind[10,]==1], pik[Ind[10,]==1]) +HT11 <- HT(y[Ind[11,]==1], pik[Ind[11,]==1]) +HT12 <- HT(y[Ind[12,]==1], pik[Ind[12,]==1]) +HT13 <- HT(y[Ind[13,]==1], pik[Ind[13,]==1]) +HT14 <- HT(y[Ind[14,]==1], pik[Ind[14,]==1]) +HT15 <- HT(y[Ind[15,]==1], pik[Ind[15,]==1]) +# The HT estimates arranged in a vector +Est <- c(HT1, HT2, HT3, HT4, HT5, HT6, HT7, HT8, HT9, HT10, HT11, HT12, HT13, + HT14, HT15) +Est +# The HT is actually desgn-unbiased +data.frame(Ind, Est, p) +sum(Est*p) +sum(y) } \references{ Horvitz, D.G. and Thompson, D.J. (1952). A generalization of sampling diff --git a/man/IPFP.rd b/man/IPFP.rd index 6e213f5..94ec3b5 100644 --- a/man/IPFP.rd +++ b/man/IPFP.rd @@ -36,11 +36,35 @@ Convergence is assessed by the sum of absolute differences between known and estimated marginals. } \examples{ -# A 2x2 table to be raked to known marginals -Table <- matrix(c(10, 20, 30, 40), nrow = 2) -Row.knw <- c(40, 60) -Col.knw <- c(35, 65) -IPFP(Table, Col.knw, Row.knw) +############ +## Example 1 +############ +Table <- matrix(c(80, 90, 10, 170, 80, 80, 150, 210, 130), 3, 3) +rownames(Table) <- c("a1", "a2", "a3") +colnames(Table) <- c("b1", "b2", "b3") +Col.knw <- c(150, 300, 550) +Row.knw <- c(430, 360, 210) +IPFP(Table, Col.knw, Row.knw, tol = 0.0001) +############ +## Example 2 +############ +data(Lucy) +attach(Lucy) +N <- dim(Lucy)[1] +n <- 400 +sam <- sample(N, n) +data <- Lucy[sam, ] +attach(data) +Doma1 <- Domains(Level) +Doma2 <- Domains(SPAM) +SPAM.no <- Doma2[, 1] * Doma1 +SPAM.yes <- Doma2[, 2] * Doma1 +est1 <- E.SI(N, n, SPAM.no)[, 2:4] +est2 <- E.SI(N, n, SPAM.yes)[, 2:4] +Table <- cbind(est1[1, ], est2[1, ]) +Col.knw <- colSums(Domains(Lucy$SPAM)) +Row.knw <- colSums(Domains(Lucy$Level)) +IPFP(Table, Col.knw, Row.knw, tol = 0.0001) } \references{ Deming, W.E. and Stephan, F.F. (1940). On a least squares adjustment of diff --git a/man/Ik.rd b/man/Ik.rd index a3a0945..61ce770 100644 --- a/man/Ik.rd +++ b/man/Ik.rd @@ -28,12 +28,12 @@ this function will be very slow. It is intended primarily for theoretical illustrations and teaching purposes. } \examples{ -# All possible samples of size n = 2 from N = 4 units -N <- 4 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) n <- 2 +# The sample membership matrix Ik(N, n) -# Number of rows equals choose(N, n) = 6 -nrow(Ik(N, n)) == choose(N, n) +# The first unit, Yves, belongs to the first four possible samples } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/IkRS.rd b/man/IkRS.rd index e8777ac..59cc45a 100644 --- a/man/IkRS.rd +++ b/man/IkRS.rd @@ -26,10 +26,11 @@ populations only (\code{N <= 10}) due to the exponential growth of the support size. } \examples{ -# Full indicator matrix for N = 3 -IkRS(3) -# Number of rows: 1 (empty) + 3 + 3 + 1 = 8 = 2^3 -nrow(IkRS(3)) +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) +# The sample membership matrix for all sample sizes +IkRS(N) +# The first sample is a null one and the last sample is a census } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/IkWR.rd b/man/IkWR.rd index 4a3fd5e..9e8a5e7 100644 --- a/man/IkWR.rd +++ b/man/IkWR.rd @@ -29,12 +29,11 @@ This function is intended for small populations and few draws only, as the support grows rapidly with \code{N} and \code{m}. } \examples{ -# With-replacement support: N = 3 units, m = 2 draws -N <- 3 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) m <- 2 +# The sample membership matrix for with-replacement sampling IkWR(N, m) -# Number of rows = choose(N + m - 1, m) = choose(4, 2) = 6 -nrow(IkWR(N, m)) == choose(N + m - 1, m) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/OrderWR.rd b/man/OrderWR.rd index bf37d1e..0498237 100644 --- a/man/OrderWR.rd +++ b/man/OrderWR.rd @@ -32,13 +32,17 @@ from \code{N} units is \eqn{N^m}. This grows rapidly and the function should only be used for small \code{N} and \code{m}. } \examples{ -# All ordered sequences of 2 draws from N = 3 units -OrderWR(N = 3, m = 2) -# N^m = 9 rows - -# With population labels -U <- c("A", "B", "C") -OrderWR(N = 3, m = 2, ID = U) +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) +# Five possible ordered samples of size m=1 +OrderWR(N, 1) +OrderWR(N, 1, ID = U) +# 25 possible ordered samples of size m=2 +OrderWR(N, 2) +OrderWR(N, 2, ID = U) +# Note: ordered samples differ from unordered (SupportWR) +OrderWR(N, 2) +SupportWR(N, 2) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/PikHol.rd b/man/PikHol.rd index 23fad66..92db8b3 100644 --- a/man/PikHol.rd +++ b/man/PikHol.rd @@ -38,13 +38,36 @@ root of this composite. The resulting sample size \code{n.st} is chosen to minimise total variance subject to a relative precision target \code{e}. } \examples{ -data('Lucy') +############ +## Example 1 +############ +data(Lucy) attach(Lucy) -# Two surveys with different auxiliary variables -sigma <- cbind(Employees, Income) -n <- c(100, 150) -pik <- PikHol(n, sigma, e = 0.1) -sum(pik <= 1) # all valid probabilities +N <- dim(Lucy)[1] +n <- c(350, 400) +sigy1 <- sqrt(Income^(1)) +sigy2 <- sqrt(Income^(2)) +sigma <- cbind(sigy1, sigy2) +Piks <- PikHol(n, sigma, 0.03) +n.opt <- round(sum(Piks)) +res <- S.piPS(n.opt, Piks) +sam <- res[, 1] +Pik.s <- res[, 2] +estima <- data.frame(Lucy$Income[sam], Lucy$Employees[sam]) +E.piPS(estima, Pik.s) +############ +## Example 2 - with custom inclusion probabilities +############ +data(Lucy) +attach(Lucy) +N <- dim(Lucy)[1] +n <- c(350, 400) +sigy1 <- sqrt(Income^(1)) +sigy2 <- sqrt(Income^(2)) +sigma <- cbind(sigy1, sigy2) +pikas <- cbind(rep(400/N, N), rep(400/N, N)) +Piks <- PikHol(n, sigma, 0.03, pikas) +round(sum(Piks)) } \references{ Holmberg, A. (2002). A multiparameter perspective on the choice of sampling diff --git a/man/PikPPS.rd b/man/PikPPS.rd index aac894f..3d16b0e 100644 --- a/man/PikPPS.rd +++ b/man/PikPPS.rd @@ -28,15 +28,50 @@ redistributes the remaining sample size among the other units until all probabilities are valid. The result satisfies \eqn{\sum \pi_k = n}. } \examples{ -data('Lucy') +############ +## Example 1 +############ +x <- c(30,41,50,170,43,200) +n <- 3 +# Two elements yields values bigger than one +n*x/sum(x) +# With this functions, all of the values are between zero and one +PikPPS(n,x) +# The sum is equal to the sample size +sum(PikPPS(n,x)) + +############ +## Example 2 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# The auxiliary information +x <- c(52, 60, 75, 100, 50) +# Gives the inclusion probabilities for the population accordin to a +# proportional to size design without replacement of size n=4 +pik <- PikPPS(4,x) +pik +# The selected sample is +sum(pik) + +############ +## Example 3 +############ +# Uses the Lucy data to compute teh vector of inclusion probabilities +# accordind to a piPS without replacement design +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n <- 400 -Pik <- PikPPS(n, Employees) -# Check: sum equals n -sum(Pik) -# All values are valid probabilities -all(Pik > 0 & Pik <= 1) +# The sample size +n=400 +# The selection probability of each unit is proportional to the variable Income +pik <- PikPPS(n,Income) +# The inclusion probabilities of the units in the sample +pik +# The sum of the values in pik is equal to the sample size +sum(pik) +# According to the design some elements must be selected +# They are called forced inclusion units +which(pik==1) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/Pikl.rd b/man/Pikl.rd index 7bb1e49..1e7fc49 100644 --- a/man/Pikl.rd +++ b/man/Pikl.rd @@ -33,12 +33,13 @@ estimator. This function enumerates the full sampling support via (\code{N <= 15}). } \examples{ -# Population N = 5, sample size n = 2 -N <- 5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) n <- 2 p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -pi2 <- Pikl(N, n, p) -pi2 +sum(p) +# Second-order inclusion probabilities +Pikl(N, n, p) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.BE.rd b/man/S.BE.rd index 482b0ad..b3ac584 100644 --- a/man/S.BE.rd +++ b/man/S.BE.rd @@ -26,23 +26,33 @@ Binomial(\code{N}, \code{prob}) distribution. To extract the selected indices, use \code{sam[sam != 0]}. } \examples{ -# Population of size N = 100, inclusion probability 10\% -N <- 100 -prob <- 0.1 -sam <- S.BE(N, prob) +############ +## Example 1 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Draws a Bernoulli sample without replacement of expected size n=3 +# The inlusion probability is 0.6 for each unit in the population +sam <- S.BE(5,0.6) +sam +# The selected sample is +U[sam] -# Extract selected indices -selected <- sam[sam != 0] -length(selected) # random, around 10 +############ +## Example 2 +############ +# Uses the Lucy data to draw a Bernoulli sample -# Using Lucy data -data('Lucy') -N <- nrow(Lucy) -prob <- 0.05 -sam <- S.BE(N, prob) -sam <- sam[sam != 0] -y <- data.frame(Income = Lucy$Income[sam]) -E.BE(y, prob) +data(Lucy) +attach(Lucy) +N <- dim(Lucy)[1] +# The population size is 2396. If the expected sample size is 400 +# then, the inclusion probability must be 400/2396=0.1669 +sam <- S.BE(N,0.01669) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +data +dim(data) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.PO.rd b/man/S.PO.rd index dbf2f3c..6fbe0e6 100644 --- a/man/S.PO.rd +++ b/man/S.PO.rd @@ -27,15 +27,36 @@ unequal inclusion probabilities. The sample size is random. To extract the selected indices, use \code{sam[sam != 0]}. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Draws a Bernoulli sample without replacement of expected size n=3 +# "Erik" is drawn in every possible sample becuse its inclusion probability is one +Pik <- c(0.5, 0.2, 1, 0.9, 0.5) +sam <- S.PO(5,Pik) +sam +# The selected sample is +U[sam] + +############ +## Example 2 +############ +# Uses the Lucy data to draw a Poisson sample +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n <- 400 -Pik <- PikPPS(n, Employees) -sam <- S.PO(N, Pik) -sam <- sam[sam != 0] -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.PO(y, Pik[sam]) +N <- dim(Lucy)[1] +n <- 400 +Pik<-n*Income/sum(Income) +# None element of Pik bigger than one +which(Pik>1) +# The selected sample +sam <- S.PO(N,Pik) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +data +dim(data) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.PPS.rd b/man/S.PPS.rd index db3d8bd..e451992 100644 --- a/man/S.PPS.rd +++ b/man/S.PPS.rd @@ -30,14 +30,35 @@ unit may appear more than once. Use \code{\link{E.PPS}} or \code{\link{HH}} to estimate population totals from this sample. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# The auxiliary information +x <- c(52, 60, 75, 100, 50) +# Draws a PPS sample with replacement of size m=3 +res <- S.PPS(3,x) +sam <- res[,1] +# The selected sample is +U[sam] + +############ +## Example 2 +############ +# Uses the Lucy data to draw a random sample according to a +# PPS with replacement design +data(Lucy) attach(Lucy) -m <- 400 -res <- S.PPS(m, Employees) -sam <- res[, 1] -pk <- res[, 2] -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.PPS(y, pk) +# The selection probability of each unit is proportional to the variable Income +m <- 400 +res<-S.PPS(400,Income) +# The selected sample +sam <- res[,1] +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +data +dim(data) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.SI.rd b/man/S.SI.rd index 3c31dd8..529e83c 100644 --- a/man/S.SI.rd +++ b/man/S.SI.rd @@ -31,14 +31,30 @@ ensuring exactly \code{n} units are selected. To extract the selected indices, filter out the zeros: \code{sam[sam != 0]}. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Fixes the random numbers in order to select a sample +e <- c(0.4938, 0.7044, 0.4585, 0.6747, 0.0640) +# Draws a simple random sample without replacement of size n=3 +sam <- S.SI(5, 3, e) +sam +# The selected sample is +U[sam] +############ +## Example 2 +############ +# Uses the Lucy data to draw a random sample according to a SI design +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n <- 400 +N <- dim(Lucy)[1] +n <- 400 sam <- S.SI(N, n) -sam <- sam[sam != 0] -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.SI(N, n, y) +# The information about the units in the sample +data <- Lucy[sam, ] +dim(data) } \references{ Fan, C.T., Muller, M.E. and Rezucha, I. (1962). Development of sampling diff --git a/man/S.STPPS.rd b/man/S.STPPS.rd index 2fa5a04..2ab154f 100644 --- a/man/S.STPPS.rd +++ b/man/S.STPPS.rd @@ -34,15 +34,52 @@ appear more than once within a stratum. Use \code{\link{E.STPPS}} to estimate population totals from this sample. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# The auxiliary information +x <- c(52, 60, 75, 100, 50) +# Vector Strata contains an indicator variable of stratum membership +Strata <- c("A", "A", "A", "B", "B") +# Then sample size in each stratum +mh <- c(2,2) +# Draws a stratified PPS sample with replacement of size n=4 +res <- S.STPPS(Strata, x, mh) +# The selected sample +sam <- res[,1] +U[sam] +# The selection probability of each unit selected to be in the sample +pk <- res[,2] +pk + +############ +## Example 2 +############ +# Uses the Lucy data to draw a stratified random sample +# according to a PPS design in each stratum + +data(Lucy) attach(Lucy) -mh <- c(20, 30, 50) -res <- S.STPPS(Level, Employees, mh) -head(res) -sam <- res$sam -pk <- res$pk -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.STPPS(y, pk, mh, Level[sam]) +# Level is the stratifying variable +summary(Level) +# Defines the sample size at each stratum +m1<-70 +m2<-100 +m3<-200 +mh<-c(m1,m2,m3) +# Draws a stratified sample +res<-S.STPPS(Level, Income, mh) +# The selected sample +sam<-res[,1] +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +data +dim(data) +# The selection probability of each unit selected in the sample +pk <- res[,2] +pk } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.STSI.rd b/man/S.STSI.rd index 89db823..53bee36 100644 --- a/man/S.STSI.rd +++ b/man/S.STSI.rd @@ -32,14 +32,29 @@ order. Use \code{\link{E.STSI}} to estimate population totals from this sample. } \examples{ -data('Lucy') +############ +## Example 1 +############ +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +Strata <- c("A", "A", "A", "B", "B") +Nh <- c(3, 2) +nh <- c(2, 1) +sam <- S.STSI(Strata, Nh, nh) +sam +U[sam] +############ +## Example 2 +############ +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -Nh <- as.numeric(table(Level)) +N1 <- summary(Level)[[1]] +N2 <- summary(Level)[[2]] +N3 <- summary(Level)[[3]] +Nh <- c(N1, N2, N3) nh <- c(70, 100, 200) sam <- S.STSI(Level, Nh, nh) -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.STSI(Level[sam], Nh, nh, y) +data <- Lucy[sam, ] +dim(data) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.STpiPS.Rd b/man/S.STpiPS.Rd index a8d8c86..dd3d41c 100644 --- a/man/S.STpiPS.Rd +++ b/man/S.STpiPS.Rd @@ -34,17 +34,32 @@ draw \code{nh[h]} units with probabilities proportional to \code{x}. The global population indices are preserved in the output. } \examples{ -data('Lucy') +############ +## Example 1 +############ +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +x <- c(52, 60, 75, 100, 50) +Strata <- c("A", "A", "A", "B", "B") +nh <- c(2, 2) +res <- S.STpiPS(Strata, x, nh) +sam <- res[, 1] +U[sam] +pik <- res[, 2] +pik +############ +## Example 2 +############ +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n1 <- 70; n2 <- 100; n3 <- 200 -nh <- c(n1, n2, n3) +N1 <- summary(Level)[[1]] +N2 <- summary(Level)[[2]] +N3 <- summary(Level)[[3]] +nh <- c(70, 100, 200) res <- S.STpiPS(Level, Employees, nh) -head(res) sam <- res[, 1] -Pik <- res[, 2] -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.STpiPS(y, Pik, Level[sam]) +data <- Lucy[sam, ] +dim(data) +pik <- res[, 2] } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.SY.rd b/man/S.SY.rd index 4955d59..7ab7c58 100644 --- a/man/S.SY.rd +++ b/man/S.SY.rd @@ -28,13 +28,34 @@ multiple of \code{a}, the sample size varies by one unit depending on the random start. Use \code{\link{E.SY}} to estimate population totals. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# The population of size N=5 is divided in a=2 groups +# Draws a Systematic sample. +sam <- S.SY(5,2) +sam +# The selected sample is +U[sam] +# There are only two possible samples + +############ +## Example 2 +############ +# Uses the Lucy data to draw a Systematic sample +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -a <- 10 -sam <- S.SY(N, a) -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.SY(N, a, y) + +N <- dim(Lucy)[1] +# The population is divided in 6 groups +# The selected sample +sam <- S.SY(N,6) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +data +dim(data) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.WR.rd b/man/S.WR.rd index 05f4203..4d12723 100644 --- a/man/S.WR.rd +++ b/man/S.WR.rd @@ -27,13 +27,32 @@ sequential binomial draw approach. Use \code{\link{E.WR}} to estimate population totals. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Draws a simple random sample witho replacement of size m=3 +sam <- S.WR(5,3) +sam +# The selected sample +U[sam] + +############ +## Example 2 +############ +# Uses the Lucy data to draw a random sample of units accordind to a +# simple random sampling with replacement design +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -m <- 400 -sam <- S.WR(N, m) -y <- data.frame(Income = Income[sam], Expenditure = Expenditure[sam]) -E.WR(N, m, y) + +N <- dim(Lucy)[1] +m <- 400 +sam<-S.WR(N,m) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +data +dim(data) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/S.piPS.Rd b/man/S.piPS.Rd new file mode 100644 index 0000000..7bef94b --- /dev/null +++ b/man/S.piPS.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S.piPS.r +\name{S.piPS} +\alias{S.piPS} +\title{Probability Proportional to Size Without-Replacement Sampling (piPS)} +\usage{ +S.piPS(n, x, e = runif(length(x))) +} +\arguments{ +\item{n}{Sample size.} + +\item{x}{Vector of length \code{N} with positive auxiliary size values.} + +\item{e}{Optional vector of \code{N} uniform random variates in \code{(0,1)}. +If omitted, \code{runif(N)} is used.} +} +\value{ +A matrix with \code{n} rows and two columns: +\itemize{ + \item Column 1: population indices of the selected units. + \item Column 2: first-order inclusion probabilities of the selected units. +} +} +\description{ +Draws a without-replacement sample of size \code{n} using a sequential +algorithm that produces inclusion probabilities proportional to an +auxiliary size variable \code{x}. +} +\examples{ +############ +## Example 1 +############ +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +x <- c(52, 60, 75, 100, 50) +# Draws a piPS sample without replacement of size n=3 +res <- S.piPS(3, x) +res +sam <- res[, 1] +U[sam] +############ +## Example 2 +############ +# Uses the Lucy data +data(Lucy) +attach(Lucy) +res <- S.piPS(400, Income) +sam <- res[, 1] +Pik.s <- res[, 2] +data <- Lucy[sam, ] +dim(data) +} +\references{ +Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), +\emph{Model Assisted Survey Sampling}. Springer.\cr +Gutierrez, H. A. (2009), \emph{Estrategias de muestreo: Diseno de encuestas +y estimacion de parametros}. Editorial Universidad Santo Tomas. +} +\seealso{ +\code{\link{E.piPS}}, \code{\link{PikPPS}}, \code{\link{S.STPPS}} +} +\author{ +Hugo Andres Gutierrez Rojas +} diff --git a/man/Support.rd b/man/Support.rd index 23a06a7..67497cb 100644 --- a/man/Support.rd +++ b/man/Support.rd @@ -31,12 +31,16 @@ It is intended for small populations only. For \code{N > 15} it becomes very slow. } \examples{ -# All samples of size 2 from a population of 5 U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") -Support(N = 5, n = 2, ID = U) - -# Integer indices only -Support(N = 5, n = 2) +N <- length(U) +n <- 2 +# Ten possible samples of size n=2 +Support(N, n) +# Labeled support +Support(N, n, ID = U) +# Support showing values of y +y <- c(32, 34, 46, 89, 35) +Support(N, n, ID = y) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/SupportRS.rd b/man/SupportRS.rd index 8878a79..8e61d9c 100644 --- a/man/SupportRS.rd +++ b/man/SupportRS.rd @@ -28,9 +28,12 @@ sizes \eqn{n = 1, \ldots, N}. It is only feasible for small populations (\code{N <= 10}) due to exponential growth. } \examples{ -# Complete support for N = 3 -SupportRS(3) -# 2^3 = 8 rows +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) +# Complete support for all sample sizes +SupportRS(N) +# Labeled support +SupportRS(N, ID = U) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/SupportWR.rd b/man/SupportWR.rd index fd750d0..2540389 100644 --- a/man/SupportWR.rd +++ b/man/SupportWR.rd @@ -31,9 +31,14 @@ generate all non-decreasing sequences of length \code{m} from \eqn{\{1, \ldots, N\}}. } \examples{ -# All unordered outcomes: N = 3, m = 2 -SupportWR(N = 3, m = 2) -# choose(3+2-1, 2) = choose(4,2) = 6 rows +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) +m <- 2 +# With-replacement support +SupportWR(N, m) +SupportWR(N, m, ID = U) +y <- c(32, 34, 46, 89, 35) +SupportWR(N, m, ID = y) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/T.SIC.rd b/man/T.SIC.rd index 443eb94..e13e9dc 100644 --- a/man/T.SIC.rd +++ b/man/T.SIC.rd @@ -29,18 +29,34 @@ The output can be passed directly to \code{\link{E.1SI}} or \code{\link{E.SI}} treating each cluster total as an observation. } \examples{ -library(dplyr) -data('BigCity') -UI <- levels(as.factor(BigCity$PSU)) -NI <- length(UI) -nI <- 10 -sam <- S.SI(NI, nI) -sampleI <- UI[sam[sam != 0]] -CityI <- BigCity[BigCity$PSU \%in\% sampleI, ] -y <- data.frame(Income = CityI$Income, - Expenditure = CityI$Expenditure) -cluster <- CityI$PSU -T.SIC(y, cluster) +############ +## Example 1 +############ +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +y1 <- c(32, 34, 46, 89, 35) +y2 <- c(1, 1, 1, 0, 0) +y3 <- cbind(y1, y2) +Cluster <- c("C1", "C2", "C1", "C2", "C1") +T.SIC(y1, Cluster) +T.SIC(y3, Cluster) +############ +## Example 2 - Cluster sampling with Lucy data +############ +data(Lucy) +attach(Lucy) +UI <- c("A", "B", "C", "D", "E") +NI <- length(UI) +nI <- 2 +samI <- S.SI(NI, nI) +dataI <- UI[samI] +Lucy1 <- Lucy[which(Zone == dataI[1]), ] +Lucy2 <- Lucy[which(Zone == dataI[2]), ] +LucyI <- rbind(Lucy1, Lucy2) +attach(LucyI) +Cluster <- as.factor(as.integer(Zone)) +estima <- data.frame(Income, Employees, Taxes) +Ty <- T.SIC(estima, Cluster) +E.SI(NI, nI, Ty) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/VarHT.rd b/man/VarHT.rd index d227e02..20dda7d 100644 --- a/man/VarHT.rd +++ b/man/VarHT.rd @@ -35,11 +35,15 @@ enumerating the full support and is only feasible for small populations (\code{N <= 15}). } \examples{ -N <- 5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +y1 <- c(32, 34, 46, 89, 35) +y2 <- c(1, 1, 1, 0, 0) +N <- length(U) n <- 2 -y <- c(32, 34, 46, 89, 35) p <- c(0.13, 0.2, 0.15, 0.1, 0.15, 0.04, 0.02, 0.06, 0.07, 0.08) -VarHT(y, N, n, p) +# Theoretical variance of the HT estimator +VarHT(y1, N, n, p) +VarHT(y2, N, n, p) } \references{ Horvitz, D.G. and Thompson, D.J. (1952). A generalization of sampling diff --git a/man/Wk.rd b/man/Wk.rd index 84b1a41..25d53d3 100644 --- a/man/Wk.rd +++ b/man/Wk.rd @@ -39,18 +39,163 @@ where \eqn{v_k = 1/(\pi_k c_k)} and \eqn{c_k} is a variance-stabilising constant. The GREG estimator is then \eqn{\hat{t}_{GREG} = \sum_s w_k y_k}. } \examples{ -data('Lucy') +############ +## Example 1 +############ +# Without replacement sampling +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vector x is the auxiliary information and y is the variables of interest +x<-c(32, 34, 46, 89, 35) +y<-c(52, 60, 75, 100, 50) +# pik is some vector of inclusion probabilities in the sample +# In this case the sample size is equal to the population size +pik<-rep(1,5) +w1<-Wk(x,tx=236,pik,ck=1,b0=FALSE) +sum(x*w1) +# Draws a sample size without replacement +sam <- sample(5,2) +pik <- c (0.8,0.2,0.2,0.5,0.3) +# The auxiliary information an variable of interest in the selected smaple +x.s<-x[sam] +y.s<-y[sam] +# The vector of inclusion probabilities in the selected smaple +pik.s<-pik[sam] +# Calibration weights under some specifics model +w2<-Wk(x.s,tx=236,pik.s,ck=1,b0=FALSE) +sum(x.s*w2) + +w3<-Wk(x.s,tx=c(5,236),pik.s,ck=1,b0=TRUE) +sum(w3) +sum(x.s*w3) + +w4<-Wk(x.s,tx=c(5,236),pik.s,ck=x.s,b0=TRUE) +sum(w4) +sum(x.s*w4) + +w5<-Wk(x.s,tx=236,pik.s,ck=x.s,b0=FALSE) +sum(x.s*w5) + +###################################################################### +## Example 2: Linear models involving continuous auxiliary information +###################################################################### + +# Draws a simple random sample without replacement +data(Lucy) attach(Lucy) -N <- nrow(Lucy) -n <- 400 -sam <- S.SI(N, n) + +N <- dim(Lucy)[1] +n <- 400 Pik <- rep(n/N, n) -x <- as.matrix(Employees[sam]) -tx <- sum(Employees) -ck <- rep(1, n) -wk <- Wk(x, tx, Pik, ck) -# Check calibration: weighted sum of x equals tx -sum(wk * x) +sam <- S.SI(N,n) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) + +########### common ratio model ################### + +estima<-data.frame(Income) +x <- Employees +tx <- sum(Lucy$Employees) +w <- Wk(x, tx, Pik, ck=1, b0=FALSE) +sum(x*w) +tx +# The calibration estimation +colSums(estima*w) + +########### Simple regression model without intercept ################### + +estima<-data.frame(Income, Employees) +x <- Taxes +tx <- sum(Lucy$Taxes) +w<-Wk(x,tx,Pik,ck=x,b0=FALSE) +sum(x*w) +tx +# The calibration estimation +colSums(estima*w) + +########### Multiple regression model without intercept ################### + +estima<-data.frame(Income) +x <- cbind(Employees, Taxes) +tx <- c(sum(Lucy$Employees), sum(Lucy$Taxes)) +w <- Wk(x,tx,Pik,ck=1,b0=FALSE) +sum(x[,1]*w) +sum(x[,2]*w) +tx +# The calibration estimation +colSums(estima*w) + +########### Simple regression model with intercept ################### + +estima<-data.frame(Income, Employees) +x <- Taxes +tx <- c(N,sum(Lucy$Taxes)) +w <- Wk(x,tx,Pik,ck=1,b0=TRUE) +sum(1*w) +sum(x*w) +tx +# The calibration estimation +colSums(estima*w) + +########### Multiple regression model with intercept ################### + +estima<-data.frame(Income) +x <- cbind(Employees, Taxes) +tx <- c(N, sum(Lucy$Employees), sum(Lucy$Taxes)) +w <- Wk(x,tx,Pik,ck=1,b0=TRUE) +sum(1*w) +sum(x[,1]*w) +sum(x[,2]*w) +tx +# The calibration estimation +colSums(estima*w) + +#################################################################### +## Example 3: Linear models involving discrete auxiliary information +#################################################################### + +# Draws a simple random sample without replacement +data(Lucy) +attach(Lucy) + +N <- dim(Lucy)[1] +n <- 400 +sam <- S.SI(N,n) +# The information about the units in the sample is stored in an object called data +data <- Lucy[sam,] +attach(data) +names(data) +# Vector of inclusion probabilities for units in the selected sample +Pik<-rep(n/N,n) +# The auxiliary information is discrete type +Doma<-Domains(Level) + +########### Poststratified common mean model ################### + +estima<-data.frame(Income, Employees, Taxes) +tx <- colSums(Domains(Lucy$Level)) +w <- Wk(Doma,tx,Pik,ck=1,b0=FALSE) +sum(Doma[,1]*w) +sum(Doma[,2]*w) +sum(Doma[,3]*w) +tx +# The calibration estimation +colSums(estima*w) + +########### Poststratified common ratio model ################### + +estima<-data.frame(Income, Employees) +x<-Doma*Taxes +tx <- colSums(Domains(Lucy$Level)) +w <- Wk(x,tx,Pik,ck=1,b0=FALSE) +sum(x[,1]*w) +sum(x[,2]*w) +sum(x[,3]*w) +tx +# The calibration estimation +colSums(estima*w) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992), diff --git a/man/kish_allocation.Rd b/man/kish_allocation.Rd index 061890e..6134330 100644 --- a/man/kish_allocation.Rd +++ b/man/kish_allocation.Rd @@ -9,10 +9,11 @@ kish_allocation(n, N_h, I = 0.5) \arguments{ \item{n}{Integer. Total desired sample size.} -\item{N_h}{Named numeric vector. Population sizes for each stratum \eqn{h = 1, \ldots, H}.} +\item{N_h}{Named numeric vector. Population sizes for each stratum +\eqn{h = 1, \ldots, H}.} -\item{I}{Non-negative numeric. Intraclass correlation coefficient (ICC) or - design effect parameter controlling the allocation: +\item{I}{Non-negative numeric. Intraclass correlation coefficient (ICC) +or design effect parameter controlling the allocation: \itemize{ \item \code{I = 0} → Uniform allocation (equal sample per stratum). \item \code{I = Inf} → Proportional allocation (proportional to \eqn{N_h}). @@ -33,13 +34,13 @@ uniform and proportional allocation through a design effect parameter \code{I}. \details{ The Kish compromise allocation assigns sample sizes as: \deqn{ - n_h = n \cdot \frac{\sqrt{I \, W_h^2 + H^{-2}}}{\sum_{h=1}^{H} \sqrt{I \, W_h^2 + H^{-2}}} + n_h = n \cdot \frac{\sqrt{I \, W_h^2 + H^{-2}}} + {\sum_{h=1}^{H} \sqrt{I \, W_h^2 + H^{-2}}} } -where \eqn{W_h = N_h / N} is the stratum weight and \eqn{H} is the number of strata. - -This formulation nests two classical allocations as limiting cases: -when \eqn{I = 0} the numerator reduces to \eqn{1/H} (uniform), and as -\eqn{I \to \infty} it is dominated by \eqn{W_h} (proportional). +where \eqn{W_h = N_h / N} is the stratum weight and \eqn{H} is the number +of strata. This formulation nests two classical allocations as limiting +cases: when \eqn{I = 0} the numerator reduces to \eqn{1/H} (uniform), +and as \eqn{I \to \infty} it is dominated by \eqn{W_h} (proportional). } \examples{ N_h <- c( @@ -54,18 +55,20 @@ N_h <- c( # Uniform allocation (I = 0) kish_allocation(n = 3096, N_h = N_h, I = 0) -# Proportional allocation (I -> Inf, use a large number) +# Proportional allocation (I -> Inf) kish_allocation(n = 3096, N_h = N_h, I = 1e6) # Kish recommended compromise (I = 0.5) kish_allocation(n = 3096, N_h = N_h, I = 0.5) - } \references{ Kish, L. (1992). Weighting for unequal \eqn{P_i}. \emph{Journal of Official Statistics}, 8(2), 183–200. } \seealso{ -\code{\link[TeachingSampling]{S.STSI}} for stratified simple random sampling, -\code{\link[TeachingSampling]{S.STPPS}} for stratified PPS sampling. +\code{\link{E.STSI}} for estimation under stratified sampling, +\code{\link{S.STSI}} for stratified simple random sampling. +} +\author{ +Yury Vanessa Ochoa Montes } diff --git a/man/nk.rd b/man/nk.rd index c1926ce..d5f451a 100644 --- a/man/nk.rd +++ b/man/nk.rd @@ -28,9 +28,10 @@ this function records how many times each unit was selected. This is needed for with-replacement estimators based on selection frequencies. } \examples{ -# Frequency matrix: N = 3 units, m = 2 draws -N <- 3 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +N <- length(U) m <- 2 +# Frequency matrix for with-replacement sampling nk(N, m) } \references{ diff --git a/man/p.WR.rd b/man/p.WR.rd index 29bc131..d3bd638 100644 --- a/man/p.WR.rd +++ b/man/p.WR.rd @@ -31,12 +31,40 @@ where \eqn{n_k} is the number of times unit \eqn{k} appears in outcome \eqn{s} and \eqn{p_k} is the selection probability of unit \eqn{k}. } \examples{ -# N = 3 units, m = 2 draws, equal probabilities -N <- 3 -m <- 2 -pk <- c(1/3, 1/3, 1/3) -p <- p.WR(N, m, pk) -sum(p) # must equal 1 +############ +## Example 1 +############ +# With replacement simple random sampling +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vector pk is the sel?ection probability of the units in the finite population +pk <- c(0.2, 0.2, 0.2, 0.2, 0.2) +sum(pk) +N <- length(pk) +m <- 3 +# The smapling design +p <- p.WR(N, m, pk) +p +sum(p) + +############ +## Example 2 +############ +# With replacement PPS random sampling +# Vector U contains the label of a population of size N=5 +U <- c("Yves", "Ken", "Erik", "Sharon", "Leslie") +# Vector x is the auxiliary information and y is the variables of interest +x<-c(32, 34, 46, 89, 35) +y<-c(52, 60, 75, 100, 50) +# Vector pk is the sel?ection probability of the units in the finite population +pk <- x/sum(x) +sum(pk) +N <- length(pk) +m <- 3 +# The smapling design +p <- p.WR(N, m, pk) +p +sum(p) } \references{ Sarndal, C-E. and Swensson, B. and Wretman, J. (1992),