ALStructure-manuscript/refs.bib at master · StoreyLab/ALStructure-manuscript · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
@ARTICLE {psd,
    author  = "Pritchard, J.~K. and Stephens, M. and Donnelly, P.",
    title   = "Inference of Population Structure Using Multilocus Genotype Data",
    journal = "Genetics",
    year    = "2000",
    volume = "155",
    number = "2",
    pages = "{945-959}"
}
@ARTICLE {admixture,
    author  = "Alexander, D.~H. and Novembre, J. and Lange, K.",
    title   = "Fast model-based estimation of ancestry in unrelated individuals",
    journal = "Genome Research",
    year    = "2009",
    volume = "19",
    pages = "{1655-1664}"
}
@ARTICLE {tera,
    author  = "Gopalan, P. and Hao, W. and Blei, D.~M. and Storey, J.~D.",
    title   = "Scaling Probabilistic Models of Genetic Variation to Millions of Humans",
    journal = "Nature Genetics",
    year    = "2016",
    volume = "48",
    number = "12",
    pages = "{1587-1592}"
}
@article {fast,
	author = {Raj, Anil and Stephens, Matthew and Pritchard, J.~K.},
	title = {{fastSTRUCTURE}: Variational Inference of Population Structure in Large {SNP} Data Sets},
	volume = {197},
	number = {2},
	pages = {573--589},
	year = {2014},
	doi = {10.1534/genetics.114.164350},
	publisher = {Genetics},
	abstract = {Tools for estimating population structure from genetic data are now used in a wide variety of applications in population genetics. However, inferring population structure in large modern data sets imposes severe computational challenges. Here, we develop efficient algorithms for approximate inference of the model underlying the STRUCTURE program using a variational Bayesian framework. Variational methods pose the problem of computing relevant posterior distributions as an optimization problem, allowing us to build on recent advances in optimization theory to develop fast inference tools. In addition, we propose useful heuristic scores to identify the number of populations represented in a data set and a new hierarchical prior to detect weak population structure in the data. We test the variational algorithms on simulated data and illustrate using genotype data from the CEPH{\textendash}Human Genome Diversity Panel. The variational algorithms are almost two orders of magnitude faster than STRUCTURE and achieve accuracies comparable to those of ADMIXTURE. Furthermore, our results show that the heuristic scores for choosing model complexity provide a reasonable range of values for the number of populations represented in the data, with minimal bias toward detecting structure when it is very weak. Our algorithm, fastSTRUCTURE, is freely available online at http://pritchardlab.stanford.edu/structure.html.},
	issn = {0016-6731},
	URL = {http://www.genetics.org/content/197/2/573},
	eprint = {http://www.genetics.org/content/197/2/573.full.pdf},
	journal = {Genetics}
}

@Article{chen,
  author = {Xiongzhi Chen and John D. Storey},
  title = {Consistent Estimation of Low-Dimensional Latent Structure in High-Dimensional Data},
  year = {2015},
  journal = {arXiv},
  volume = {1510.03497},
  url = {https://arxiv.org/abs/1510.03497}
  }

@ARTICLE {berry,
    author  = "Berry, M.~W. and Browne, M. and Langville, A.~N. and Pauca, V.~P. and Plemmons, R.~J.",
    title   = "Algorithms and Applications for Approximate Nonnegative Matrix Factorization",
    journal = "Computational Statistics \& Data Analysis",
    year    = "2007",
    volume = "52",
    pages =  "{155-173}"
}

@BOOK {B_V,
    author    = "Boyd, S. and Vandenberghe, L.",
    title     = "Convex Optimization",
    publisher = "Cambridge University Press",
    year      = "2009"
}

@ARTICLE {G_S,
    author  = "Grippo, L. and Sciandrone, M.",
    title   = "On the Convergence of the Block Nonlinear Gauss-Seidel Method Under Convex Constraints",
    journal = "Computational Statistics and Data Analysis",
    year    = "2000",
    volume = "26",
    number = "3",
    pages = "{127-136}"
}

@BOOK{bartholomew,
    author = "Bartholomew, D.~J.",
    title = "Latent Variable Models and Factor Analysis",
    publisher = "Oxford University Press",
    year = "1987"
}

@article {Alex,
	author = {Ochoa, Alejandro and Storey, J.~D.},
	title = {{$\text{F}_{\text{ST}}$} and kinship for arbitrary population structures {II}: Method of moments estimators},
	year = {2016},
	doi = {10.1101/083923},
	publisher = {Cold Spring Harbor Laboratory},
	abstract = {FST and kinship are key parameters often estimated in modern population genetics studies. Kinship matrices have also become a fundamental quantity used in genome-wide association studies and heritability estimation. The most frequently used estimators of FST and kinship are method of moments estimators whose accuracies depend strongly on the existence of simple underlying forms of structure, such as the island model of non-overlapping, independently evolving subpopulations. However, modern data sets have revealed that these simple models of structure do not likely hold in many populations, including humans. In this work, we provide new results on the behavior of these estimators in the presence of arbitrarily complex population structures. After establishing a framework for assessing bias and consistency of genome-wide estimators, we calculate the accuracy of FST and kinship estimators under arbitrary population structures, characterizing biases and estimation challenges unobserved under their originally assumed models of structure. We illustrate our results using simulated genotypes from an admixture model, constructing a one-dimensional geographic scenario that departs nontrivially from the island model. Using 1000 Genomes Project data, we verify that population-level pairwise FST estimates underestimate differentiation measured by an individual-level pairwise FST estimator introduced here. We show that the calculated biases are due to unknown quantities that cannot be estimated under the established frameworks, highlighting the need for innovative estimation approaches in complex populations. We provide initial results that point towards a future estimation framework for generalized FST and kinship.},
	URL = {https://www.biorxiv.org/content/early/2016/10/27/083923},
	eprint = {https://www.biorxiv.org/content/early/2016/10/27/083923.full.pdf},
	journal = {bioRxiv}
}


@ARTICLE{TGP,
    title = "A global reference for human genetic variation",
    author = "{The 1000 Genomes Project Consortium}",
    journal =  "Nature",
    year =  "2015",
    volume = "526",
    pages = "68-74"
}

@ARTICLE{HGDP,
    title = "The Human Genome Diversity Project: past, present and future",
    author = "Cavalli-Sforza, L.~L.",
    journal =  "Nature Reviews Genetics",
    year =  "2005",
    volume = "6",
    pages = "{333-340}"
}

@ARTICLE{HO,
    title = "Ancient human genomes suggest three ancestral populations for present-day Europeans",
    author = "Lazaridis, I. and others",
    journal =  "Nature",
    year =  "2014",
    volume = "513",
    pages = "{409-413}"
}

@ARTICLE{LFA,
    title = "Probabilistic models of genetic variation in structured populations applied to global human studies",
    author = "Hao, W. and Song, M. and Storey, J.~D.",
    journal = "Bioinformatics",
    year = "2016",
    volume = "32",
    number = "5",
    pages = "{713-721}"
}

@ARTICLE{knowler,
    title = "Gm3;5,13,14 and type 2 diabetes mellitus: an association in American Indians with genetic admixture.",
    author = "Knowler, W.~C. and Williams, R.~C. and Pettitt, D.~J. and Steinberg, A.~G.",
    journal = "The American Journal of Human Genetics",
    year = "1988",
    volume = "43",
    number = "4",
    pages = "{520-526}"
}

@ARTICLE{marchini,
    title = "The effects of human population structure on large genetic association studies",
    author = "Marchini, J. and Cardon, L.~R. and Phillips, M.~S. and Donnelly, P.",
    journal = "Nature Genetics",
    year = "2004",
    volume = "36",
    number = "5",
    pages = "{512-517}"
}

@ARTICLE{SHS,
    title = "Testing for genetic associations in arbitrarily structured populations",
    author = "Song, M. and Hao, W. and Storey, J.~D.",
    journal = "Nature Genetics",
    year = "2015",
    volume = "47",
    number = "5",
    pages = "{550-554}"
}

@ARTICLE{akey_africa,
    title = "Estimating African American Admixture Proportions by Use of Population Specific Alleles",
    author = "Esteban ,J.~P. and Marcini, A. and Akey, J. and Martinson, J. and Batzer, M.~A. and Cooper, R. and Forrester, T. and Allison, D.B. and Deka, R. and Ferrell, R.E. and Shriver M.D.",
    journal = "The American Journal of Human Genetics",
    year = "1998",
    volume = "63",
    number = "6",
    pages = "{839-851}"
}

@ARTICLE{sforza,
    title = "Reconstruction of human evolution: Bringing together genetic, archaeological, and linguistic data",
    author = "Cavalli-Sforza, L.~L. and Piazza, A. and Menozzi, P. and Mountain, J.",
    journal = "Proceedings of the National Academy of Sciences",
    year = "1988",
    volume = "85",
    number =  "16",
    pages = "{6002-6006}"
}

@ARTICLE{Li1100,
	author = {Li, J.~Z. and Absher, D.~M. and Tang, Hua and Southwick, A.~M. and Casto, A.~M. and Ramachandran, Sohini and Cann, H.~M. and Barsh, Gregory S. and Feldman, Marcus and Cavalli-Sforza, L.~L. and Myers, R.~M.},
	title = {Worldwide Human Relationships Inferred from Genome-Wide Patterns of Variation},
	volume = {319},
	number = {5866},
	pages = {1100--1104},
	year = {2008},
	doi = {10.1126/science.1153717},
	publisher = {American Association for the Advancement of Science},
	abstract = {Human genetic diversity is shaped by both demographic and biological factors and has fundamental implications for understanding the genetic basis of diseases. We studied 938 unrelated individuals from 51 populations of the Human Genome Diversity Panel at 650,000 common single-nucleotide polymorphism loci. Individual ancestry and population substructure were detectable with very high resolution. The relationship between haplotype heterozygosity and geography was consistent with the hypothesis of a serial founder effect with a single origin in sub-Saharan Africa. In addition, we observed a pattern of ancestral allele frequency distributions that reflects variation in population dynamics among geographic regions. This data set allows the most comprehensive characterization to date of human genetic variation.},
	issn = {0036-8075},
	URL = {http://science.sciencemag.org/content/319/5866/1100},
	eprint = {http://science.sciencemag.org/content/319/5866/1100.full.pdf},
	journal = {Science}
}

@ARTICLE{frappe,
	author = "Tang, H. and Peng, J. and Wang, P. and Risch, N.",
	title = "Estimation of individual admixture: Analytical and study design considerations.",
	journal =  "Genet Epidemiol",
	year = "2005",
	volume = "28",
	number = "4",
	pages = "{289-301}"
}

@ARTICLE{eigen1,
	author = "Patterson, N. and Price, A. and Reich, D.",
	title = "Population Structure and Eigenanalysis",
	journal = "PLoS Genetics",
	year = "2006",
	volume = "2",
	number = "12",
	pages = "{e190}"
}

@ARTICLE{eigen2,
	author = "Price, A. and Patterson, N. and Plenge, R. and Weinblatt, M. and Shadick, N. and Reich, D.",
	title = "Principal components analysis corrects for stratification in genome-wide association studies",
	journal = "Nature Genetics",
	year = "2006",
	volume = "38",
	number = "8",
	pages = "{904-909}"
}

@ARTICLE{als_orig,
	author = "Paatero, P. and Tapper, U.",
	title = "Positive matrix factorization: a non-negative factor model with optimal utilization of error estimates of data values",
	journal = "Environmetrics",
	year = "1994",
	volume = "5",
	number = "2",
	pages = "{111-126}"
}

@ARTICLE{BN,
	author = "Balding, D.~J. and Nichols, R.~A.",
	title = "A method for quantifying differentiation between populations at multi-allelic loci and its implications for investigating identity and paternity",
	journal = "Genetica",
	year = "1995",
	volume = "96",
	number = "{1-2}",
	pages = "{3-12}"
}

@ARTICLE{FST,
	author = "Weir, B.~S. and Cockerham, C.~C.",
	year = "1984",
	title = "Estimating {F}-Statistics for the Analysis of Population Structure",
	journal = "Evolution",
	volume =  "38",
	number = "6",
	pages = "{1358-1370}"
}

@InProceedings{arora_anchor,
  title = 	 {A Practical Algorithm for Topic Modeling with Provable Guarantees},
  author = 	 {Sanjeev Arora and Rong Ge and Yonatan Halpern and David Mimno and Ankur Moitra and David Sontag and Yichen Wu and Michael Zhu},
  booktitle = 	 {Proceedings of the 30th International Conference on Machine Learning},
  pages = 	 {280--288},
  year = 	 {2013},
  editor = 	 {Sanjoy Dasgupta and David McAllester},
  volume = 	 {28},
  number =       {2},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {Atlanta, Georgia, USA},
  month = 	 {17--19 Jun},
  publisher = 	 {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v28/arora13.pdf},
  url = 	 {http://proceedings.mlr.press/v28/arora13.html},
  abstract = 	 {Topic models provide a useful method for dimensionality reduction and exploratory data analysis in large text corpora. Most approaches to topic model learning have been based on a maximum likelihood objective. Efficient algorithms exist that attempt to approximate this objective, but they have no provable guarantees. Recently, algorithms have been introduced that provide provable bounds, but these algorithms are not practical because they are inefficient and not robust to violations of model assumptions. In this paper we present an algorithm for learning topic models that is both provable and practical. The algorithm produces results comparable to the best MCMC implementations while running orders of magnitude faster.}
}

@Article{projsplx,
  author = {{Chen}, Y. and {Ye}, X.},
  title = "{Projection Onto A Simplex}",
  year = {2011},
  journal = {arXiv},
  archivePrefix = "arXiv",
  volume = {1101.6081},
  url = {https://arxiv.org/abs/1101.6081}
}

@ARTICLE{zheng_weir,
	author = "Zheng, X. and Weir, B.~S.",
	title = "Eigenanalysis of {SNP} data with an identity by descent interpretation",
	journal = "Theoretical Population Biology",
	year = "2016",
	volume = "107",
	pages = "65-76"
}

@ARTICLE{pcadmix,
	author="Brisbin, Abra and Bryc, Katarzyna and Byrnes, Jake and Zakharia, Fouad and Omberg, Larsson and Degenhardt, Jeremiah and Reynolds, Andrew and Ostrer, Harry and Mezey, J.~G. and Bustamante, C.~D.",
	title = "{PCAdmix}: Principal Components-Based Assignment of Ancestry along Each Chromosome in Individuals with Admixed Ancestry from Two or More Populations",
	journal="Human Biology",
	year = "2012",
	volume = "84",
	number = "4",
	pages = "{343-364}"
}

@book{jolliffe2002principal,
  title={{Principal component analysis}},
  author={Jolliffe, I.~T.},
  isbn={0387954422},
  year={2002},
  publisher={Springer Verlag}
}

@article{stephens_engelhardt,
    author = {Engelhardt, Barbara E. AND Stephens, Matthew},
    journal = {PLOS Genetics},
    publisher = {Public Library of Science},
    title = {Analysis of Population Structure: A Unifying Framework and Novel Methods Based on Sparse Factor Analysis},
    year = {2010},
    month = {09},
    volume = {6},
    url = {https://doi.org/10.1371/journal.pgen.1001117},
    pages = {1-12},
    abstract = {Author Summary Two different approaches have become widely used in the analysis of population structure: admixture-based models and principal components analysis (PCA). In admixture-based models each individual is assumed to have inherited some proportion of its ancestry from one of several distinct populations. PCA projects the individuals into a low-dimensional subspace. On the face of it, these methods seem to have little in common. Here we show how in fact both of these methods can be viewed within a single unifying framework. This viewpoint should help practitioners to better interpret and contrast the results from these methods in real data applications. It also provides a springboard to the development of novel approaches to this problem. We introduce one such novel approach, based on sparse factor analysis, which has elements in common with both admixture-based models and PCA. As we illustrate here, in some settings sparse factor analysis may provide more interpretable results than either admixture-based models or PCA.},
    number = {9},
    doi = {10.1371/journal.pgen.1001117}
}

@article {basu,
	author = {Basu, Analabha and Sarkar-Roy, Neeta and Majumder, Partha P.},
	title = {Genomic reconstruction of the history of extant populations of India reveals five distinct ancestral components and a complex structure},
	volume = {113},
	number = {6},
	pages = {1594--1599},
	year = {2016},
	doi = {10.1073/pnas.1513197113},
	publisher = {National Academy of Sciences},
	abstract = {India, harboring more than one-sixth of the world population, has been underrepresented in genome-wide studies of variation. Our analysis reveals that there are four dominant ancestries in mainland populations of India, contrary to two ancestries inferred earlier. We also show that (i) there is a distinctive ancestry of the Andaman and Nicobar Islands populations that is likely ancestral also to Oceanic populations, and (ii) the extant mainland populations admixed widely irrespective of ancestry, which was rapidly replaced by endogamy, particularly among Indo-European{\textendash}speaking upper castes, about 70 generations ago. This coincides with the historical period of formulation and adoption of some relevant sociocultural norms.India, occupying the center stage of Paleolithic and Neolithic migrations, has been underrepresented in genome-wide studies of variation. Systematic analysis of genome-wide data, using multiple robust statistical methods, on (i) 367 unrelated individuals drawn from 18 mainland and 2 island (Andaman and Nicobar Islands) populations selected to represent geographic, linguistic, and ethnic diversities, and (ii) individuals from populations represented in the Human Genome Diversity Panel (HGDP), reveal four major ancestries in mainland India. This contrasts with an earlier inference of two ancestries based on limited population sampling. A distinct ancestry of the populations of Andaman archipelago was identified and found to be coancestral to Oceanic populations. Analysis of ancestral haplotype blocks revealed that extant mainland populations (i) admixed widely irrespective of ancestry, although admixtures between populations was not always symmetric, and (ii) this practice was rapidly replaced by endogamy about 70 generations ago, among upper castes and Indo-European speakers predominantly. This estimated time coincides with the historical period of formulation and adoption of sociocultural norms restricting intermarriage in large social strata. A similar replacement observed among tribal populations was temporally less uniform.},
	issn = {0027-8424},
	URL = {http://www.pnas.org/content/113/6/1594},
	eprint = {http://www.pnas.org/content/113/6/1594.full.pdf},
	journal = {Proceedings of the National Academy of Sciences}
}

@article{tipping_bishop,
author = {Tipping, Michael E. and Bishop, Christopher M.},
title = {Probabilistic Principal Component Analysis},
journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
volume = {61},
number = {3},
pages = {611-622},
year = {1999},
keywords = {Density estimation, EM algorithm, Gaussian mixtures, Maximum likelihood, Principal component analysis, Probability model},
doi = {10.1111/1467-9868.00196},
url = {https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/1467-9868.00196},
eprint = {https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/1467-9868.00196},
abstract = {Principal component analysis (PCA) is a ubiquitous technique for data analysis and processing, but one which is not based on a probability model. We demonstrate how the principal axes of a set of observed data vectors may be determined through maximum likelihood estimation of parameters in a latent variable model that is closely related to factor analysis. We consider the properties of the associated likelihood function, giving an EM algorithm for estimating the principal subspace iteratively, and discuss, with illustrative examples, the advantages conveyed by this probabilistic approach to PCA.}
}

@Article{shwe,
  author = {Hao, Wei and Storey, John D.},
  title = {Extending Tests of Hardy-Weinberg Equilibrium to Structured Populations},
  year = {2017},
  doi = {10.1101/240804},
  publisher = {Cold Spring Harbor Laboratory},
  url = {http://dx.doi.org/10.1101/240804},
  journal = {bioRxiv},
  volume = {doi:10.1101/240804}
}

@Article{Kim2014,
author="Kim, Jingu
and He, Yunlong
and Park, Haesun",
title="Algorithms for nonnegative matrix and tensor factorizations: a unified view based on block coordinate descent framework",
journal="Journal of Global Optimization",
year="2014",
month="Feb",
day="01",
volume="58",
number="2",
pages="285--319",
abstract="We review algorithms developed for nonnegative matrix factorization (NMF) and nonnegative tensor factorization (NTF) from a unified view based on the block coordinate descent (BCD) framework. NMF and NTF are low-rank approximation methods for matrices and tensors in which the low-rank factors are constrained to have only nonnegative elements. The nonnegativity constraints have been shown to enable natural interpretations and allow better solutions in numerous applications including text analysis, computer vision, and bioinformatics. However, the computation of NMF and NTF remains challenging and expensive due the constraints. Numerous algorithmic approaches have been proposed to efficiently compute NMF and NTF. The BCD framework in constrained non-linear optimization readily explains the theoretical convergence properties of several efficient NMF and NTF algorithms, which are consistent with experimental observations reported in literature. In addition, we discuss algorithms that do not fit in the BCD framework contrasting them from those based on the BCD framework. With insights acquired from the unified perspective, we also propose efficient algorithms for updating NMF when there is a small change in the reduced dimension or in the data. The effectiveness of the proposed updating algorithms are validated experimentally with synthetic and real-world data sets.",
issn="1573-2916",
doi="10.1007/s10898-013-0035-4",
url="https://doi.org/10.1007/s10898-013-0035-4"
}

@Article{Gillis:2012:AMU:2185782.2185791,
 author = {Gillis, Nicolas and Glineur, Fran\c{c}ois},
 title = {Accelerated Multiplicative Updates and Hierarchical Als Algorithms for Nonnegative Matrix Factorization},
 journal = {Neural Comput.},
 issue_date = {April 2012},
 volume = {24},
 number = {4},
 month = apr,
 year = {2012},
 issn = {0899-7667},
 pages = {1085--1105},
 numpages = {21},
 url = {http://dx.doi.org/10.1162/NECO_a_00256},
 doi = {10.1162/NECO_a_00256},
 acmid = {2185791},
 publisher = {MIT Press},
 address = {Cambridge, MA, USA},
}

@Article{lee_seung,
  author = {Lee, Daniel~D. and Sebastian, H.~Seung},
  title = {Learning the parts of objects by non-negative matrix factorization},
  year = {1999},
  doi = {10.1038/44565},
  url = {https://www.nature.com/articles/44565},
  journal = {Nature},
  volume = {401}
}

@InProceedings{10.1007/978-3-540-74494-8_22,
author="Cichocki, Andrzej
and Zdunek, Rafal
and Amari, Shun-ichi",
editor="Davies, Mike E.
and James, Christopher J.
and Abdallah, Samer A.
and Plumbley, Mark D.",
title="Hierarchical ALS Algorithms for Nonnegative Matrix and 3D Tensor Factorization",
booktitle="Independent Component Analysis and Signal Separation",
year="2007",
publisher="Springer Berlin Heidelberg",
address="Berlin, Heidelberg",
pages="169--176",
abstract="In the paper we present new Alternating Least Squares (ALS) algorithms for Nonnegative Matrix Factorization (NMF) and their extensions to 3D Nonnegative Tensor Factorization (NTF) that are robust in the presence of noise and have many potential applications, including multi-way Blind Source Separation (BSS), multi-sensory or multi-dimensional data analysis, and nonnegative neural sparse coding. We propose to use local cost functions whose simultaneous or sequential (one by one) minimization leads to a very simple ALS algorithm which works under some sparsity constraints both for an under-determined (a system which has less sensors than sources) and over-determined model. The extensive experimental results confirm the validity and high performance of the developed algorithms, especially with usage of the multi-layer hierarchical NMF. Extension of the proposed algorithm to multidimensional Sparse Component Analysis and Smooth Component Analysis is also proposed.",
isbn="978-3-540-74494-8"
}

@article{lawson1,
    author = {Lawson, Daniel John AND Hellenthal, Garrett AND Myers, Simon AND Falush, Daniel},
    journal = {PLOS Genetics},
    publisher = {Public Library of Science},
    title = {Inference of Population Structure using Dense Haplotype Data},
    year = {2012},
    month = {01},
    volume = {8},
    url = {https://doi.org/10.1371/journal.pgen.1002453},
    pages = {1-16},
    abstract = {Author Summary The first step in almost every genetic analysis is to establish how sample members are related to each other. High relatedness between individuals can arise if they share a small number of recent ancestors, e.g. if they are distant cousins or a larger number of more distant ones, e.g. if their ancestors come from the same region. The most popular methods for investigating these relationships analyse successive markers independently, simply adding the information they provide. This works well for studies involving hundreds of markers scattered around the genome but is less appropriate now that entire genomes can be sequenced. We describe a “chromosome painting” approach to characterising shared ancestry that takes into account the fact that DNA is transmitted from generation to generation as a linear molecule in chromosomes. We show that the approach increases resolution relative to previous techniques, allowing differences in ancestry profiles among individuals to be resolved at the finest scales yet. We provide mathematical, statistical, and graphical machinery to exploit this new information and to characterize relationships at continental, regional, local, and family scales.},
    number = {1},
    doi = {10.1371/journal.pgen.1002453}
}

@article{lawson2,
    author = {Lawson, Daniel John AND van Dorp, Lucy AND Falush, Daniel},
    journal = {Nature Communications},
    title = {A tutorial on how not to over-interpret STRUCTURE and ADMIXTURE bar plots},
    year = {2018},
    month = {08},
    volume = {9},
    doi = {10.1038/s41467-018-05257-7}
}

@ARTICLE{topic_model,
       author = {{Ke}, Zheng Tracy and {Wang}, Minzhe},
        title = "{A new SVD approach to optimal topic estimation}",
      journal = {arXiv e-prints},
     keywords = {Statistics - Methodology, 62H12, 62H25, 62C20, 62P25},
         year = "2017",
        month = "Apr",
          eid = {arXiv:1704.07016},
        pages = {arXiv:1704.07016},
archivePrefix = {arXiv},
       eprint = {1704.07016},
 primaryClass = {stat.ME},
       adsurl = {https://ui.adsabs.harvard.edu/\#abs/2017arXiv170407016K},
      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}

@ARTICLE{mmsbm,
       author = {{Rubin-Delanchy}, Patrick and {Priebe}, Carey E. and {Tang}, Minh},
        title = "{Consistency of adjacency spectral embedding for the mixed membership stochastic blockmodel}",
      journal = {arXiv e-prints},
     keywords = {Statistics - Methodology},
         year = "2017",
        month = "May",
          eid = {arXiv:1705.04518},
        pages = {arXiv:1705.04518},
archivePrefix = {arXiv},
       eprint = {1705.04518},
 primaryClass = {stat.ME},
       adsurl = {https://ui.adsabs.harvard.edu/\#abs/2017arXiv170504518R},
      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}

@book{efron_2013,
	  place={Cambridge},
	  title={Large-scale inference: empirical Bayes methods for estimation, testing, and prediction},
	  publisher={Cambridge University Press},
	  author={Efron, Bradley},
	  year={2013}
}

@inproceedings{stein1956,
address = "Berkeley, Calif.",
author = "Stein, Charles",
booktitle = "Proceedings of the Third Berkeley Symposium on Mathematical Statistics and Probability, Volume 1: Contributions to the Theory of Statistics",
pages = "197--206",
publisher = "University of California Press",
title = "Inadmissibility of the Usual Estimator for the Mean of a Multivariate Normal Distribution",
url = "https://projecteuclid.org/euclid.bsmsp/1200501656",
year = "1956"
}