|
| 1 | +@incollection{Bengio+chapter2007, |
| 2 | +author = {Bengio, Yoshua and LeCun, Yann}, |
| 3 | +booktitle = {Large Scale Kernel Machines}, |
| 4 | +publisher = {MIT Press}, |
| 5 | +title = {Scaling Learning Algorithms Towards {AI}}, |
| 6 | +year = {2007} |
| 7 | +} |
| 8 | + |
| 9 | +@article{Hinton06, |
| 10 | +author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee Whye}, |
| 11 | +journal = {Neural Computation}, |
| 12 | +pages = {1527--1554}, |
| 13 | +title = {A Fast Learning Algorithm for Deep Belief Nets}, |
| 14 | +volume = {18}, |
| 15 | +year = {2006} |
| 16 | +} |
| 17 | + |
| 18 | +@book{goodfellow2016deep, |
| 19 | +title={Deep learning}, |
| 20 | +author={Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua}, |
| 21 | +volume={1}, |
| 22 | +year={2016}, |
| 23 | +publisher={MIT Press} |
| 24 | +} |
| 25 | + |
| 26 | +@inproceedings{Matthey2016vaeLB, |
| 27 | + title={Β-vae: Learning Basic Visual Concepts with a Constrained Variational Framework}, |
| 28 | + author={Loic Matthey and Arka Pal and Christopher Burgess and Xavier Glorot and Matthew Botvinick and Shakir Mohamed and Alexander Lerchner}, |
| 29 | + year={2016} |
| 30 | +} |
| 31 | + |
| 32 | +@article{Doersch2016, |
| 33 | +abstract = {In just three years, Variational Autoencoders (VAEs) have emerged as one of the most popular approaches to unsupervised learning of complicated distributions. VAEs are appealing because they are built on top of standard function approximators (neural networks), and can be trained with stochastic gradient descent. VAEs have already shown promise in generating many kinds of complicated data, including handwritten digits, faces, house numbers, CIFAR images, physical models of scenes, segmentation, and predicting the future from static images. This tutorial introduces the intuitions behind VAEs, explains the mathematics behind them, and describes some empirical behavior. No prior knowledge of variational Bayesian methods is assumed.}, |
| 34 | +archivePrefix = {arXiv}, |
| 35 | +arxivId = {1606.05908}, |
| 36 | +author = {Doersch, Carl}, |
| 37 | +eprint = {1606.05908}, |
| 38 | +file = {:home/alfredo/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Doersch - 2016 - Tutorial on Variational Autoencoders.pdf:pdf}, |
| 39 | +mendeley-groups = {Reproducibility}, |
| 40 | +month = {jun}, |
| 41 | +title = {{Tutorial on Variational Autoencoders}}, |
| 42 | +url = {http://arxiv.org/abs/1606.05908}, |
| 43 | +year = {2016} |
| 44 | +} |
| 45 | + |
| 46 | +@article{yeung2016epitomic, |
| 47 | + title={Epitomic Variational Autoencoders}, |
| 48 | + author={Yeung, Serena and Kannan, Anitha and Dauphin, Yann and Fei-Fei, Li}, |
| 49 | + year={2016} |
| 50 | +} |
| 51 | + |
| 52 | +@inproceedings{titsias2011spike, |
| 53 | + title={Spike and slab variational inference for multi-task and multiple kernel learning}, |
| 54 | + author={Titsias, Michalis K and L{\'a}zaro-Gredilla, Miguel}, |
| 55 | + booktitle={Advances in neural information processing systems}, |
| 56 | + pages={2339--2347}, |
| 57 | + year={2011} |
| 58 | +} |
| 59 | + |
| 60 | + |
| 61 | +@article{ishwaran2005spike, |
| 62 | + title={Spike and slab variable selection: frequentist and Bayesian strategies}, |
| 63 | + author={Ishwaran, Hemant and Rao, J Sunil and others}, |
| 64 | + journal={The Annals of Statistics}, |
| 65 | + volume={33}, |
| 66 | + number={2}, |
| 67 | + pages={730--773}, |
| 68 | + year={2005}, |
| 69 | + publisher={Institute of Mathematical Statistics} |
| 70 | +} |
| 71 | + |
| 72 | + |
| 73 | +@article{bengio2013representation, |
| 74 | + title={Representation learning: A review and new perspectives}, |
| 75 | + author={Bengio, Yoshua and Courville, Aaron and Vincent, Pascal}, |
| 76 | + journal={IEEE transactions on pattern analysis and machine intelligence}, |
| 77 | + volume={35}, |
| 78 | + number={8}, |
| 79 | + pages={1798--1828}, |
| 80 | + year={2013}, |
| 81 | + publisher={IEEE} |
| 82 | +} |
| 83 | + |
| 84 | + |
| 85 | +@article{Kim2018, |
| 86 | +abstract = {We define and address the problem of unsupervised learning of disentangled representations on data generated from independent factors of variation. We propose FactorVAE, a method that disentangles by encouraging the distribution of representations to be factorial and hence independent across the dimensions. We show that it improves upon {\$}\backslashbeta{\$}-VAE by providing a better trade-off between disentanglement and reconstruction quality. Moreover, we highlight the problems of a commonly used disentanglement metric and introduce a new metric that does not suffer from them.}, |
| 87 | +archivePrefix = {arXiv}, |
| 88 | +arxivId = {1802.05983}, |
| 89 | +author = {Kim, Hyunjik and Mnih, Andriy}, |
| 90 | +eprint = {1802.05983}, |
| 91 | +file = {:home/alfredo/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kim, Mnih - 2018 - Disentangling by Factorising.pdf:pdf}, |
| 92 | +mendeley-groups = {Reproducibility}, |
| 93 | +month = {feb}, |
| 94 | +title = {{Disentangling by Factorising}}, |
| 95 | +url = {http://arxiv.org/abs/1802.05983}, |
| 96 | +year = {2018} |
| 97 | +} |
| 98 | +@article{Nalisnick2016, |
| 99 | +abstract = {We extend Stochastic Gradient Variational Bayes to perform posterior inference for the weights of Stick-Breaking processes. This development allows us to define a Stick-Breaking Variational Autoencoder (SB-VAE), a Bayesian nonparametric version of the variational autoencoder that has a latent representation with stochastic dimensionality. We experimentally demonstrate that the SB-VAE, and a semi-supervised variant, learn highly discriminative latent representations that often outperform the Gaussian VAE's.}, |
| 100 | +archivePrefix = {arXiv}, |
| 101 | +arxivId = {1605.06197}, |
| 102 | +author = {Nalisnick, Eric and Smyth, Padhraic}, |
| 103 | +eprint = {1605.06197}, |
| 104 | +file = {:home/alfredo/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Nalisnick, Smyth - 2016 - Stick-Breaking Variational Autoencoders.pdf:pdf}, |
| 105 | +mendeley-groups = {Reproducibility}, |
| 106 | +month = {may}, |
| 107 | +title = {{Stick-Breaking Variational Autoencoders}}, |
| 108 | +url = {http://arxiv.org/abs/1605.06197}, |
| 109 | +year = {2016} |
| 110 | +} |
| 111 | +@article{Kingma2013, |
| 112 | +archivePrefix = {arXiv}, |
| 113 | +arxivId = {1312.6114}, |
| 114 | +author = {Kingma, Diederik P and Welling, Max}, |
| 115 | +eprint = {1312.6114}, |
| 116 | +file = {:home/alfredo/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kingma, Welling - 2013 - Auto-Encoding Variational Bayes(3).pdf:pdf}, |
| 117 | +mendeley-groups = {Reproducibility}, |
| 118 | +month = {dec}, |
| 119 | +title = {{Auto-Encoding Variational Bayes}}, |
| 120 | +url = {https://arxiv.org/abs/1312.6114}, |
| 121 | +year = {2013} |
| 122 | +} |
| 123 | +@article{Burgess2018, |
| 124 | +abstract = {We present new intuitions and theoretical assessments of the emergence of disentangled representation in variational autoencoders. Taking a rate-distortion theory perspective, we show the circumstances under which representations aligned with the underlying generative factors of variation of data emerge when optimising the modified ELBO bound in {\$}\backslashbeta{\$}-VAE, as training progresses. From these insights, we propose a modification to the training regime of {\$}\backslashbeta{\$}-VAE, that progressively increases the information capacity of the latent code during training. This modification facilitates the robust learning of disentangled representations in {\$}\backslashbeta{\$}-VAE, without the previous trade-off in reconstruction accuracy.}, |
| 125 | +archivePrefix = {arXiv}, |
| 126 | +arxivId = {1804.03599}, |
| 127 | +author = {Burgess, Christopher P. and Higgins, Irina and Pal, Arka and Matthey, Loic and Watters, Nick and Desjardins, Guillaume and Lerchner, Alexander}, |
| 128 | +eprint = {1804.03599}, |
| 129 | +file = {:home/alfredo/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Burgess et al. - 2018 - Understanding disentangling in {\$}beta{\$}-VAE.pdf:pdf}, |
| 130 | +mendeley-groups = {Reproducibility}, |
| 131 | +month = {apr}, |
| 132 | +title = {{Understanding disentangling in $\beta$-VAE}}, |
| 133 | +url = {http://arxiv.org/abs/1804.03599}, |
| 134 | +year = {2018} |
| 135 | +} |
| 136 | + |
| 137 | +@article{kingma2014adam, |
| 138 | + title={Adam: A method for stochastic optimization}, |
| 139 | + author={Kingma, Diederik P and Ba, Jimmy}, |
| 140 | + journal={arXiv preprint arXiv:1412.6980}, |
| 141 | + year={2014} |
| 142 | +} |
| 143 | + |
| 144 | + |
| 145 | + |
| 146 | +@inproceedings{paszke2017automatic, |
| 147 | + title={Automatic differentiation in PyTorch}, |
| 148 | + author={Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam}, |
| 149 | + booktitle={NIPS-W}, |
| 150 | + year={2017} |
| 151 | +} |
| 152 | + |
| 153 | +@inproceedings{liu2015deep, |
| 154 | + title={Deep learning face attributes in the wild}, |
| 155 | + author={Liu, Ziwei and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou}, |
| 156 | + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, |
| 157 | + pages={3730--3738}, |
| 158 | + year={2015} |
| 159 | +} |
| 160 | + |
| 161 | + |
| 162 | +@article{higgins2016beta, |
| 163 | + title={beta-vae: Learning basic visual concepts with a constrained variational framework}, |
| 164 | + author={Higgins, Irina and Matthey, Loic and Pal, Arka and Burgess, Christopher and Glorot, Xavier and Botvinick, Matthew and Mohamed, Shakir and Lerchner, Alexander}, |
| 165 | + year={2016} |
| 166 | +} |
| 167 | + |
| 168 | + |
| 169 | +@inproceedings{van2017neural, |
| 170 | + title={Neural discrete representation learning}, |
| 171 | + author={van den Oord, Aaron and Vinyals, Oriol and others}, |
| 172 | + booktitle={Advances in Neural Information Processing Systems}, |
| 173 | + pages={6306--6315}, |
| 174 | + year={2017} |
| 175 | +} |
| 176 | + |
| 177 | + |
| 178 | +@article{kusner2017grammar, |
| 179 | + title={Grammar variational autoencoder}, |
| 180 | + author={Kusner, Matt J and Paige, Brooks and Hern{\'a}ndez-Lobato, Jos{\'e} Miguel}, |
| 181 | + journal={arXiv preprint arXiv:1703.01925}, |
| 182 | + year={2017} |
| 183 | +} |
| 184 | + |
| 185 | +@article{salimans2016structured, |
| 186 | + title={A Structured Variational Auto-encoder for Learning Deep Hierarchies of Sparse Features}, |
| 187 | + author={Salimans, Tim}, |
| 188 | + journal={arXiv preprint arXiv:1602.08734}, |
| 189 | + year={2016} |
| 190 | +} |
| 191 | + |
| 192 | + |
| 193 | +@incollection{NIPS2015_5666, |
| 194 | +title = {Variational Dropout and the Local Reparameterization Trick}, |
| 195 | +author = {Kingma, Durk P and Salimans, Tim and Welling, Max}, |
| 196 | +booktitle = {Advances in Neural Information Processing Systems 28}, |
| 197 | +editor = {C. Cortes and N. D. Lawrence and D. D. Lee and M. Sugiyama and R. Garnett}, |
| 198 | +pages = {2575--2583}, |
| 199 | +year = {2015}, |
| 200 | +publisher = {Curran Associates, Inc.}, |
| 201 | +url = {http://papers.nips.cc/paper/5666-variational-dropout-and-the-local-reparameterization-trick.pdf} |
| 202 | +} |
| 203 | + |
| 204 | +@article{louizos2017learning, |
| 205 | + title={Learning Sparse Neural Networks through $ L\_0 $ Regularization}, |
| 206 | + author={Louizos, Christos and Welling, Max and Kingma, Diederik P}, |
| 207 | + journal={arXiv preprint arXiv:1712.01312}, |
| 208 | + year={2017} |
| 209 | +} |
| 210 | + |
| 211 | + |
| 212 | +@inproceedings{casale2018gaussian, |
| 213 | + title={Gaussian Process Prior Variational Autoencoders}, |
| 214 | + author={Casale, Francesco Paolo and Dalca, Adrian and Saglietti, Luca and Listgarten, Jennifer and Fusi, Nicolo}, |
| 215 | + booktitle={Advances in Neural Information Processing Systems}, |
| 216 | + pages={10390--10401}, |
| 217 | + year={2018} |
| 218 | +} |
| 219 | + |
| 220 | + |
| 221 | +@article{jin2018junction, |
| 222 | + title={Junction Tree Variational Autoencoder for Molecular Graph Generation}, |
| 223 | + author={Jin, Wengong and Barzilay, Regina and Jaakkola, Tommi}, |
| 224 | + journal={arXiv preprint arXiv:1802.04364}, |
| 225 | + year={2018} |
| 226 | +} |
| 227 | + |
| 228 | + |
| 229 | +@inproceedings{walker2016uncertain, |
| 230 | + title={An uncertain future: Forecasting from static images using variational autoencoders}, |
| 231 | + author={Walker, Jacob and Doersch, Carl and Gupta, Abhinav and Hebert, Martial}, |
| 232 | + booktitle={European Conference on Computer Vision}, |
| 233 | + pages={835--851}, |
| 234 | + year={2016}, |
| 235 | + organization={Springer} |
| 236 | +} |
| 237 | + |
| 238 | + |
| 239 | +@article{rolfe2016discrete, |
| 240 | + title={Discrete variational autoencoders}, |
| 241 | + author={Rolfe, Jason Tyler}, |
| 242 | + journal={arXiv preprint arXiv:1609.02200}, |
| 243 | + year={2016} |
| 244 | +} |
| 245 | + |
| 246 | + |
| 247 | +@article{chen2016variational, |
| 248 | + title={Variational lossy autoencoder}, |
| 249 | + author={Chen, Xi and Kingma, Diederik P and Salimans, Tim and Duan, Yan and Dhariwal, Prafulla and Schulman, John and Sutskever, Ilya and Abbeel, Pieter}, |
| 250 | + journal={arXiv preprint arXiv:1611.02731}, |
| 251 | + year={2016} |
| 252 | +} |
| 253 | + |
| 254 | + |
| 255 | + |
| 256 | + |
| 257 | +@article{Goodfellow2012, |
| 258 | +abstract = {We consider the problem of object recogni- tion with a large number of classes. In or- der to overcome the low amount of labeled examples available in this setting, we in- troduce a new feature learning and extrac- tion procedure based on a factor model we call spike-and-slab sparse coding (S3C). Prior work on S3C has not prioritized the abil- ity to exploit parallel architectures and scale S3C to the enormous problem sizes needed for object recognition. We present a novel inference procedure for appropriate for use with GPUs which allows us to dramatically increase both the training set size and the amount of latent factors that S3C may be trained with. We demonstrate that this ap- proach improves upon the supervised learn- ing capabilities of both sparse coding and the spike-and-slab Restricted Boltzmann Ma- chine (ssRBM) on the CIFAR-10 dataset. We use the CIFAR-100 dataset to demonstrate that our method scales to large numbers of classes better than previous methods. Fi- nally, we use our method to win the NIPS 2011 Workshop on Challenges In Learning Hierarchical Models' Transfer Learning Chal- lenge.}, |
| 259 | +archivePrefix = {arXiv}, |
| 260 | +arxivId = {1206.6407}, |
| 261 | +author = {Goodfellow, I. J. and Courville, Aaron and Bengio, Yoshua}, |
| 262 | +eprint = {1206.6407}, |
| 263 | +file = {:home/alfredo/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Goodfellow, Courville, Bengio - 2012 - Large-Scale Feature Learning With Spike-and-Slab Sparse Coding.pdf:pdf}, |
| 264 | +isbn = {9781450312851}, |
| 265 | +issn = {0511-9618}, |
| 266 | +journal = {Arxiv}, |
| 267 | +mendeley-groups = {Reproducibility}, |
| 268 | +month = {jun}, |
| 269 | +number = {1}, |
| 270 | +pages = {1--21}, |
| 271 | +title = {{Large-Scale Feature Learning With Spike-and-Slab Sparse Coding}}, |
| 272 | +url = {https://arxiv.org/abs/1206.6407}, |
| 273 | +year = {2012} |
| 274 | +} |
| 275 | + |
| 276 | +@misc{ |
| 277 | +tonolini2019variational, |
| 278 | +title={Variational Sparse Coding}, |
| 279 | +author={Francesco Tonolini and Bjorn Sand Jensen and Roderick Murray-Smith}, |
| 280 | +year={2019}, |
| 281 | +url={https://openreview.net/forum?id=SkeJ6iR9Km}, |
| 282 | +} |
| 283 | + |
| 284 | +@article{xiao2017fashion, |
| 285 | + title={Fashion-mnist: a novel image dataset for benchmarking machine learning algorithms}, |
| 286 | + author={Xiao, Han and Rasul, Kashif and Vollgraf, Roland}, |
| 287 | + journal={arXiv preprint arXiv:1708.07747}, |
| 288 | + year={2017} |
| 289 | +} |
| 290 | + |
| 291 | + |
| 292 | +@article{lecun1998gradient, |
| 293 | + title={Gradient-based learning applied to document recognition}, |
| 294 | + author={LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick}, |
| 295 | + journal={Proceedings of the IEEE}, |
| 296 | + volume={86}, |
| 297 | + number={11}, |
| 298 | + pages={2278--2324}, |
| 299 | + year={1998}, |
| 300 | + publisher={IEEE} |
| 301 | +} |
| 302 | + |
| 303 | + |
| 304 | +@article{Chalk2016, |
| 305 | +archivePrefix = {arXiv}, |
| 306 | +arxivId = {1605.07332}, |
| 307 | +author = {Chalk, Matthew and Marre, Olivier and Tkacik, Gasper}, |
| 308 | +eprint = {1605.07332}, |
| 309 | +file = {:home/alfredo/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Chalk, Marre, Tkacik - 2016 - Relevant sparse codes with variational information bottleneck.pdf:pdf}, |
| 310 | +mendeley-groups = {Reproducibility}, |
| 311 | +month = {may}, |
| 312 | +title = {{Relevant sparse codes with variational information bottleneck}}, |
| 313 | +url = {https://arxiv.org/abs/1605.07332}, |
| 314 | +year = {2016} |
| 315 | +} |
| 316 | + |
| 317 | + |
| 318 | +@article{Rezende2014, |
| 319 | +abstract = {We marry ideas from deep neural networks and approximate Bayesian inference to derive a generalised class of deep, directed generative models, endowed with a new algorithm for scalable inference and learning. Our algorithm introduces a recognition model to represent approximate posterior distributions, and that acts as a stochastic encoder of the data. We develop stochastic back-propagation -- rules for back-propagation through stochastic variables -- and use this to develop an algorithm that allows for joint optimisation of the parameters of both the generative and recognition model. We demonstrate on several real-world data sets that the model generates realistic samples, provides accurate imputations of missing data and is a useful tool for high-dimensional data visualisation.}, |
| 320 | +archivePrefix = {arXiv}, |
| 321 | +arxivId = {1401.4082}, |
| 322 | +author = {Rezende, Danilo Jimenez and Mohamed, Shakir and Wierstra, Daan}, |
| 323 | +eprint = {1401.4082}, |
| 324 | +file = {:home/alfredo/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Rezende, Mohamed, Wierstra - 2014 - Stochastic Backpropagation and Approximate Inference in Deep Generative Models.pdf:pdf}, |
| 325 | +mendeley-groups = {Reproducibility}, |
| 326 | +month = {jan}, |
| 327 | +title = {{Stochastic Backpropagation and Approximate Inference in Deep Generative Models}}, |
| 328 | +url = {http://arxiv.org/abs/1401.4082}, |
| 329 | +year = {2014} |
| 330 | +} |
| 331 | + |
| 332 | + |
| 333 | + |
1 | 334 | @article{Rougier:2018, |
2 | 335 | doi = {10.1038/d41586-018-04628-w}, |
3 | 336 | year = {2018}, |
|
0 commit comments