From 5f123d4fc1a5cca6fa5b4984880871a745225e54 Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Wed, 9 Aug 2017 14:39:13 +0200
Subject: [PATCH 1/9] fixed cifar10 import for python3

---
 darch/datasets.py | 58 +++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/darch/datasets.py b/darch/datasets.py
index 0b9a138..5bf3c1d 100644
--- a/darch/datasets.py
+++ b/darch/datasets.py
@@ -2,20 +2,23 @@
 import numpy as np
 import scipy as sp
 import tensorflow as tf
-import cPickle
+try:
+    import cPickle
+except ImportError:
+    import pickle as cPickle
 import os
 
 class InMemoryDataset:
-    """Wrapper around a dataset for iteration that allows cycling over the 
-    dataset. 
+    """Wrapper around a dataset for iteration that allows cycling over the
+    dataset.
 
-    This functionality is especially useful for training. One can specify if 
+    This functionality is especially useful for training. One can specify if
     the data is to be shuffled at the end of each epoch. It is also possible
     to specify a transformation function to applied to the batch before
     being returned by next_batch.
 
     """
-    
+
     def __init__(self, X, y, shuffle_at_epoch_begin, batch_transform_fn=None):
         if X.shape[0] != y.shape[0]:
             assert ValueError("X and y the same number of examples.")
@@ -30,10 +33,10 @@ def get_num_examples(self):
         return self.X.shape[0]
 
     def next_batch(self, batch_size):
-        """Returns the next batch in the dataset. 
+        """Returns the next batch in the dataset.
 
         If there are fewer that batch_size examples until the end
-        of the epoch, next_batch returns only as many examples as there are 
+        of the epoch, next_batch returns only as many examples as there are
         remaining in the epoch.
 
         """
@@ -70,11 +73,11 @@ def _extract_fn(x):
         X = x.images
         y = x.labels
 
-        if not normalize_range: 
+        if not normalize_range:
             X *= 255.0
-        
+
         return (X, y)
-        
+
     Xtrain, ytrain = _extract_fn(mnist.train)
     Xval, yval = _extract_fn(mnist.validation)
     Xtest, ytest = _extract_fn(mnist.test)
@@ -85,9 +88,9 @@ def load_cifar10(data_dir, flatten=False, one_hot=True, normalize_range=False,
         whiten_pixels=True, border_pad_size=0):
     """Loads all of CIFAR-10 in a numpy array.
 
-    Provides a few options for the output formats. For example, 
+    Provides a few options for the output formats. For example,
     normalize_range returns the output images with pixel values in [0.0, 1.0].
-    The other options are self explanatory. Border padding corresponds to 
+    The other options are self explanatory. Border padding corresponds to
     upsampling the image by zero padding the border of the image.
 
     """
@@ -95,26 +98,31 @@ def load_cifar10(data_dir, flatten=False, one_hot=True, normalize_range=False,
     val_filenames = ['data_batch_5']
     test_filenames = ['test_batch']
 
-    # NOTE: this function uses some arguments from the outer scope, namely 
+    # NOTE: this function uses some arguments from the outer scope, namely
     # flatten, one_hot, normalize_range, and possibly others once added.
     def _load_data(fpath):
-        with open(fpath, 'rb') as f: 
-            d = cPickle.load(f)
+        with open(fpath, 'rb') as f:
+            try:
+                d = cPickle.load(f)
+            except UnicodeDecodeError:
+                f.seek(0)
+                d = cPickle.load(f, encoding='bytes')
+                d = {k.decode(): v for k, v in d.items()}  # change keys into strings
 
             # for the data
             X = d['data'].astype('float32')
 
-            # reshape the data to the format (num_images, height, width, depth) 
+            # reshape the data to the format (num_images, height, width, depth)
             num_images = X.shape[0]
             num_classes = 10
             X = X.reshape( (num_images, 3, 32, 32) )
             X = X.transpose( (0,2,3,1) )
             X = X.astype('float32')
-            
+
             # transformations based on the argument options.
             if normalize_range:
                 X = X / 255.0
-            
+
             if flatten:
                 X = X.reshape( (num_images, -1) )
 
@@ -143,7 +151,7 @@ def _load_data_multiple_files(fname_list):
         y_full = np.concatenate(y_parts, axis=0)
 
         return (X_full, y_full)
-    
+
     Xtrain, ytrain = _load_data_multiple_files(train_filenames)
     Xval, yval = _load_data_multiple_files(val_filenames)
     Xtest, ytest = _load_data_multiple_files(test_filenames)
@@ -181,7 +189,7 @@ def center_crop(X, out_height, out_width):
 
     start_i = (in_height - out_height) / 2
     start_j = (in_width - out_width) / 2
-    out_X = X[:, start_i : start_i + out_height, start_j : start_j + out_width, :]  
+    out_X = X[:, start_i : start_i + out_height, start_j : start_j + out_width, :]
 
     return out_X
 
@@ -222,22 +230,22 @@ def per_image_whiten(X):
     X_flat = X.reshape((num_examples, -1))
     X_mean = X_flat.mean(axis=1)
     X_cent = X_flat - X_mean[:, None]
-    X_norm = np.sqrt( np.sum( X_cent * X_cent, axis=1) ) 
+    X_norm = np.sqrt( np.sum( X_cent * X_cent, axis=1) )
     X_out = X_cent / X_norm[:, None]
-    X_out = X_out.reshape(X.shape) 
+    X_out = X_out.reshape(X.shape)
 
     return X_out
 
 # Assumes the following ordering for X: (num_images, height, width, num_channels)
 def zero_pad_border(X, pad_size):
     n, height, width, num_channels = X.shape
-    X_padded = np.zeros((n, height + 2 * pad_size, width + 2 * pad_size, 
+    X_padded = np.zeros((n, height + 2 * pad_size, width + 2 * pad_size,
         num_channels), dtype='float32')
     X_padded[:, pad_size:height + pad_size, pad_size:width + pad_size, :] = X
-    
+
     return X_padded
 
-# auxiliary functions for 
+# auxiliary functions for
 def get_augment_cifar_data_train(out_height, out_width, p_flip):
     def augment_fn(X, y):
         X_out = random_crop(X, out_height, out_width)

From 25241603b813cfdee09a0ca06ff0db45189f3326 Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Wed, 9 Aug 2017 14:45:42 +0200
Subject: [PATCH 2/9] fixed import for py3

---
 darch/modules.py | 208 +++++++++++++++++++++++------------------------
 1 file changed, 104 insertions(+), 104 deletions(-)

diff --git a/darch/modules.py b/darch/modules.py
index d27680a..329fa76 100644
--- a/darch/modules.py
+++ b/darch/modules.py
@@ -1,5 +1,5 @@
 
-import base 
+from . import base
 import numpy as np
 import tensorflow as tf
 import copy
@@ -8,11 +8,11 @@
 class BasicModule(object):
     """Basic search and compilation functionality for basic modules.
 
-    Basic modules are composed of parameters and hyperparameters. They are not 
-    composite in the sense that do not have submodules as it is the case of 
-    Concat and Or modules. This class is not meant to be used directly. 
-    It has to be inherited and the functions that compute the output dimension 
-    and compile the module to tensorflow has to be implemented for the specific 
+    Basic modules are composed of parameters and hyperparameters. They are not
+    composite in the sense that do not have submodules as it is the case of
+    Concat and Or modules. This class is not meant to be used directly.
+    It has to be inherited and the functions that compute the output dimension
+    and compile the module to tensorflow has to be implemented for the specific
     module considered. See Affine and Dropout class definitions for examples
     of how to extend this class.
 
@@ -36,9 +36,9 @@ def initialize(self, in_d, scope):
 
         self.in_d = in_d
         self.scope = scope
-        
+
         # registers itself in the scope
-        prefix = self.__class__.__name__ 
+        prefix = self.__class__.__name__
         name = scope.get_valid_name(prefix)
         self.namespace_id = name
         scope.register_namespace(name)
@@ -46,10 +46,10 @@ def initialize(self, in_d, scope):
 
     def get_outdim(self):
         raise NotImplemented
-        
+
     def is_specified(self):
-        return (self.scope != None and 
-                self.in_d != None and 
+        return (self.scope != None and
+                self.in_d != None and
                 len(self.chosen) == len(self.order))
 
     def get_choices(self):
@@ -71,10 +71,10 @@ def choose(self, choice_i):
     # for printing
     def repr_program(self):
         name = self.__class__.__name__
-        return (name, ) + tuple(self.domains) 
+        return (name, ) + tuple(self.domains)
 
     def repr_model(self):
-        name = self.__class__.__name__ 
+        name = self.__class__.__name__
         vals = [dm[i] for i, dm in zip(self.chosen, self.domains)]
         r = (name, ) + tuple(vals)
         return (r, )
@@ -86,7 +86,7 @@ def compile(self, in_x, train_feed, eval_feed):
 class Empty(BasicModule):
     """Empty Module.
 
-    Compiles to a wire. Directly passes the input to the output without 
+    Compiles to a wire. Directly passes the input to the output without
     any transformation.
 
     """
@@ -101,7 +101,7 @@ class Dropout(BasicModule):
     """Dropout module.
 
     Dropout has different training behavior depending on whether the network
-    is being run on a training or on evaluation phase. Getting the desired 
+    is being run on a training or on evaluation phase. Getting the desired
     behavior in each case is achieved through the use of scopes.
 
     """
@@ -115,12 +115,12 @@ def get_outdim(self):
         return self.in_d
 
     def compile(self, in_x, train_feed, eval_feed):
-        p_name = self.namespace_id + '_' + self.order[0] 
+        p_name = self.namespace_id + '_' + self.order[0]
         p_var = tf.placeholder(tf.float32, name=p_name)
 
-        # during training the value of the dropout probability (keep_prob) is 
-        # set to the actual chosen value. 
-        # during evalution, it is set to 1.0. 
+        # during training the value of the dropout probability (keep_prob) is
+        # set to the actual chosen value.
+        # during evalution, it is set to 1.0.
         p_val = self.domains[0][self.chosen[0]]
         train_feed[p_var] = p_val
         eval_feed[p_var] = 1.0
@@ -182,8 +182,8 @@ def compile(self, in_x, train_feed, eval_feed):
         W = tf.Variable( param_init_fn( [n, m] ) )
         b = tf.Variable(tf.zeros([m]))
 
-        # if the number of input dimensions is larger than one, flatten the 
-        # input and apply the affine transformation. 
+        # if the number of input dimensions is larger than one, flatten the
+        # input and apply the affine transformation.
         if len(self.in_d) > 1:
             in_x_flat = tf.reshape(in_x, shape=[-1, n])
             out_y = tf.add(tf.matmul(in_x_flat, W), b)
@@ -221,8 +221,8 @@ def initialize(self, in_d, scope):
             super(MaxPooling2D, self).initialize(in_d, scope)
 
     def get_outdim(self):
-        in_height, in_width, in_nchannels = self.in_d 
-        window_len, stride, padding = [dom[i] 
+        in_height, in_width, in_nchannels = self.in_d
+        window_len, stride, padding = [dom[i]
                 for (dom, i) in zip(self.domains, self.chosen)]
 
         out_height, out_width = compute_padded_dims(
@@ -230,20 +230,20 @@ def get_outdim(self):
         out_d = (out_height, out_width, in_nchannels)
 
         return out_d
-            
+
     def compile(self, in_x, train_feed, eval_feed):
-        in_height, in_width, in_nchannels = self.in_d 
-        window_len, stride, padding = [dom[i] 
+        in_height, in_width, in_nchannels = self.in_d
+        window_len, stride, padding = [dom[i]
                 for (dom, i) in zip(self.domains, self.chosen)]
 
-        out_y = tf.nn.max_pool( 
-            in_x, ksize=[1, window_len, window_len, 1], 
+        out_y = tf.nn.max_pool(
+            in_x, ksize=[1, window_len, window_len, 1],
             strides=[1, stride, stride, 1], padding=padding)
 
         return out_y
 
 # NOTE: for now, this is close to a replication of the max pooling layer.
-# this may change later if we can capture most of the pooling layers in the 
+# this may change later if we can capture most of the pooling layers in the
 # same format. for now, an auxiliary function is provided.
 class AvgPooling2D(BasicModule):
     def __init__(self, window_lens, strides, paddings):
@@ -260,8 +260,8 @@ def initialize(self, in_d, scope):
             super(AvgPooling2D, self).initialize(in_d, scope)
 
     def get_outdim(self):
-        in_height, in_width, in_nchannels = self.in_d 
-        window_len, stride, padding = [dom[i] 
+        in_height, in_width, in_nchannels = self.in_d
+        window_len, stride, padding = [dom[i]
                 for (dom, i) in zip(self.domains, self.chosen)]
 
         out_height, out_width = compute_padded_dims(
@@ -269,27 +269,27 @@ def get_outdim(self):
         out_d = (out_height, out_width, in_nchannels)
 
         return out_d
-            
+
     def compile(self, in_x, train_feed, eval_feed):
-        in_height, in_width, in_nchannels = self.in_d 
-        window_len, stride, padding = [dom[i] 
+        in_height, in_width, in_nchannels = self.in_d
+        window_len, stride, padding = [dom[i]
                 for (dom, i) in zip(self.domains, self.chosen)]
 
-        out_y = tf.nn.avg_pool( 
-            in_x, ksize=[1, window_len, window_len, 1], 
+        out_y = tf.nn.avg_pool(
+            in_x, ksize=[1, window_len, window_len, 1],
             strides=[1, stride, stride, 1], padding=padding)
 
         return out_y
 
 class Conv2D(BasicModule):
 
-    def __init__(self, filter_numbers, filter_lens, strides, paddings, 
+    def __init__(self, filter_numbers, filter_lens, strides, paddings,
             param_init_fns):
         super(Conv2D, self).__init__()
 
         self.order.extend(["filter_number", "filter_len", "stride", "padding",
             "param_init_fn"])
-        self.domains.extend([filter_numbers, filter_lens, strides, paddings, 
+        self.domains.extend([filter_numbers, filter_lens, strides, paddings,
             param_init_fns])
 
     # does additional error checking on the dimension.
@@ -301,8 +301,8 @@ def initialize(self, in_d, scope):
             super(Conv2D, self).initialize(in_d, scope)
 
     def get_outdim(self):
-        in_height, in_width, in_nchannels = self.in_d 
-        nfilters, filter_len, stride, padding, _ = [dom[i] 
+        in_height, in_width, in_nchannels = self.in_d
+        nfilters, filter_len, stride, padding, _ = [dom[i]
                 for (dom, i) in zip(self.domains, self.chosen)]
 
         out_height, out_width = compute_padded_dims(
@@ -310,13 +310,13 @@ def get_outdim(self):
         out_d = (out_height, out_width, nfilters)
 
         return out_d
-            
+
     def compile(self, in_x, train_feed, eval_feed):
-        in_height, in_width, in_nchannels = self.in_d 
-        nfilters, filter_len, stride, padding, param_init_fn = [dom[i] 
+        in_height, in_width, in_nchannels = self.in_d
+        nfilters, filter_len, stride, padding, param_init_fn = [dom[i]
                 for (dom, i) in zip(self.domains, self.chosen)]
 
-        # Creation and initialization of the parameters. Should take size of 
+        # Creation and initialization of the parameters. Should take size of
         # the filter into account.
         W = tf.Variable(
                 param_init_fn( [filter_len, filter_len, in_nchannels, nfilters]) )
@@ -331,8 +331,8 @@ def compile(self, in_x, train_feed, eval_feed):
         return out_y
 
 class UserHyperparams(BasicModule):
-    """Used by the user to specify an additional set of hyperparameters that 
-    the user also wants to search over. The behavior of the program with 
+    """Used by the user to specify an additional set of hyperparameters that
+    the user also wants to search over. The behavior of the program with
     respect to these hyperparameters is determined by the user.
     """
     def __init__(self, order, domains):
@@ -349,17 +349,17 @@ def compile(self, in_x, train_feed, eval_feed):
         namespace["hyperp_names"] = self.order
         namespace["choices"] = tuple(self.chosen)
         namespace["hyperp_vals"] = [dom[i] for (dom, i) in zip(self.domains, self.chosen)]
-        
+
         return in_x
 
 ### Auxiliary function for dimension and choice propagation.
 def propagate_seq(bs, i):
     """ Propagates choices in a sequence of modules.
 
-    If the module in the current position of the sequence is specified, we can 
-    initialize the next module in the sequence (if there is any), and go to 
+    If the module in the current position of the sequence is specified, we can
+    initialize the next module in the sequence (if there is any), and go to
     the next module in the chain if the initialized module becomes specified.
-    
+
     """
 
     while bs[i].is_specified():
@@ -376,24 +376,24 @@ def propagate_seq(bs, i):
 def propagate(b):
     """ Propagates choices in a module.
 
-    While the module is in a state where there is only one option available for 
-    the next choice, we take that choice. This function leaves the module 
-    specified or in a state where there multiple choices. This function will 
-    typically be called by the submodule when initialize or choose is called 
+    While the module is in a state where there is only one option available for
+    the next choice, we take that choice. This function leaves the module
+    specified or in a state where there multiple choices. This function will
+    typically be called by the submodule when initialize or choose is called
     on that module.
 
     """
     while not b.is_specified() and len(b.get_choices()[1]) == 1:
         b.choose(0)
 
-### Composite modules that take other modules as input. 
+### Composite modules that take other modules as input.
 class Concat:
     def __init__(self, bs):
         if len(bs) == 0:
             raise ValueError
 
         self.bs = bs
-        self.in_d = None 
+        self.in_d = None
         self.scope = None
 
     def initialize(self, in_d, scope):
@@ -406,7 +406,7 @@ def initialize(self, in_d, scope):
 
     def get_outdim(self):
         return self.bs[-1].get_outdim()
-        
+
     def is_specified(self):
         return self.bs[-1].is_specified()
 
@@ -420,12 +420,12 @@ def choose(self, choice_i):
     def repr_program(self):
         name = self.__class__.__name__
         args = [b.repr_program() for b in self.bs]
-        return (name, ) + tuple(args) 
+        return (name, ) + tuple(args)
 
     def repr_model(self):
         vals = []
         for b in self.bs:
-            vals.extend(b.repr_model()) 
+            vals.extend(b.repr_model())
         return tuple(vals)
 
     def compile(self, in_x, train_feed, eval_feed):
@@ -442,7 +442,7 @@ def __init__(self, bs):
         self.order = ["or_branch"]
         self.domains = [ range(len(bs)) ]
         self.bs = bs
-        self.in_d = None 
+        self.in_d = None
         self.chosen = []
         self.scope = None
 
@@ -481,7 +481,7 @@ def choose(self, choice_i):
     def repr_program(self):
         name = self.__class__.__name__
         args = [b.repr_program() for b in self.bs]
-        return (name, ) + tuple(args) 
+        return (name, ) + tuple(args)
 
     def repr_model(self):
         vals = self.bs[self.chosen[0]].repr_model()
@@ -494,14 +494,14 @@ def compile(self, in_x, train_feed, eval_feed):
 class Repeat:
     """Repeat module.
 
-    Takes a module as input and repeats it some number of times. The number of 
-    repeats is itself an hyperparameter. Hyperparameters of each repeat are not 
-    tied across repeats. See the RepeatTied module for tied hyperparameters 
+    Takes a module as input and repeats it some number of times. The number of
+    repeats is itself an hyperparameter. Hyperparameters of each repeat are not
+    tied across repeats. See the RepeatTied module for tied hyperparameters
     across repeats.
 
     """
     def __init__(self, b, ks):
-        if any([k < 1 for k in ks]): 
+        if any([k < 1 for k in ks]):
             raise ValueError
 
         self.order = ["num_repeats"]
@@ -509,7 +509,7 @@ def __init__(self, b, ks):
         self.b = b
 
         # used during search
-        self.in_d = None 
+        self.in_d = None
         self.chosen = []
         self.active_bs = None
         self.b_index = None
@@ -521,7 +521,7 @@ def initialize(self, in_d, scope):
 
     def get_outdim(self):
         return self.active_bs[-1].get_outdim()
-        
+
     def is_specified(self):
         if len(self.chosen) == 1:
             return self.active_bs[-1].is_specified()
@@ -540,8 +540,8 @@ def choose(self, choice_i):
             k = self.domains[0][choice_i]
 
             self.b_index = 0
-            self.active_bs = [copy.deepcopy(self.b) for _ in xrange(k)] 
-            self.active_bs[0].initialize(self.in_d, self.scope) 
+            self.active_bs = [copy.deepcopy(self.b) for _ in xrange(k)]
+            self.active_bs[0].initialize(self.in_d, self.scope)
         else:
             self.active_bs[self.b_index].choose(choice_i)
 
@@ -556,7 +556,7 @@ def repr_program(self):
     def repr_model(self):
         vals = []
         for b in self.active_bs:
-            vals.extend(b.repr_model()) 
+            vals.extend(b.repr_model())
         return tuple(vals)
 
     def compile(self, in_x, train_feed, eval_feed):
@@ -572,10 +572,10 @@ class RepeatTied:
     Like a Repeat module, but now the hyperparameters are tied across repeats.
     Note that only the hyperparameters, not the parameters, are tied.
     Tying parameters is done through other route.
-    
+
     """
     def __init__(self, b, ks):
-        if any([k < 1 for k in ks]): 
+        if any([k < 1 for k in ks]):
             raise ValueError
 
         self.order = ["num_repeats"]
@@ -583,7 +583,7 @@ def __init__(self, b, ks):
         self.b = b
 
         # used during search
-        self.in_d = None 
+        self.in_d = None
         self.chosen = []
         self.active_bs = None
         self.b_index = None
@@ -597,7 +597,7 @@ def initialize(self, in_d, scope):
 
     def get_outdim(self):
         return self.active_bs[-1].get_outdim()
-        
+
     def is_specified(self):
         if len(self.chosen) == 1:
             return self.active_bs[-1].is_specified()
@@ -616,24 +616,24 @@ def choose(self, choice_i):
             k = self.domains[0][choice_i]
 
             self.b_index = 0
-            self.active_bs = [copy.deepcopy(self.b) for _ in xrange(k)] 
-            self.active_bs[0].initialize(self.in_d, self.scope) 
+            self.active_bs = [copy.deepcopy(self.b) for _ in xrange(k)]
+            self.active_bs[0].initialize(self.in_d, self.scope)
         else:
             self.active_bs[self.b_index].choose(choice_i)
             self.b0_choose_hist.append(choice_i)
 
         self.b_index = propagate_seq(self.active_bs, self.b_index)
 
-        # as soon as the pointer moves to 1, set all others by copying the 
+        # as soon as the pointer moves to 1, set all others by copying the
         # choices.
         if self.b_index == 1:
             for b in self.active_bs[1:]:
                 for ch in self.b0_choose_hist:
                     b.choose(ch)
-                
+
                 # this should advance the pointer by one.
                 self.b_index = propagate_seq(self.active_bs, self.b_index)
-        
+
             # it should be specified at the end of this.
             assert self.is_specified()
 
@@ -646,7 +646,7 @@ def repr_program(self):
     def repr_model(self):
         vals = []
         for b in self.active_bs:
-            vals.extend(b.repr_model()) 
+            vals.extend(b.repr_model())
         return tuple(vals)
 
     def compile(self, in_x, train_feed, eval_feed):
@@ -658,23 +658,23 @@ def compile(self, in_x, train_feed, eval_feed):
 class Residual:
     """ Residual skip connection.
 
-    Introduces a skip connection between the module passed as argument. 
+    Introduces a skip connection between the module passed as argument.
     The module taken as argument can have hyperparameters to be specified.
 
     If the input and output do not have the same dimensions, padding needs
     to be done for the results to be combined in a sum or product. We briefly
     discuss the different cases:
-    (1)| both input and output have the same number of dimensions and the same 
-    sizes for paired dimensions. 
+    (1)| both input and output have the same number of dimensions and the same
+    sizes for paired dimensions.
     => simply do the entrywise operation without doing any changes.
-    (2)| both input and output have the same number of dimensions, but they 
+    (2)| both input and output have the same number of dimensions, but they
     have different sizes for paired dimensions.
-    => pad the smallest dimensions on either input or output such that the 
-    result after padding can be combined; both input and output can be changed 
+    => pad the smallest dimensions on either input or output such that the
+    result after padding can be combined; both input and output can be changed
     in this case if none strictly dominates the other in terms of dimensions.
     (3)| input and ouput have different number of dimensions.
-    => perhaps the most straightforward solution is to flatten both input and 
-    output and combine the flattened versions. another possibility is to add 
+    => perhaps the most straightforward solution is to flatten both input and
+    output and combine the flattened versions. another possibility is to add
     extra dimensions and pad the smallest dimensions with zeros.
 
     The most straightforward solutions have been implemented for now.
@@ -683,7 +683,7 @@ class Residual:
 
     def __init__(self, b):
         self.b = b
-        self.in_d = None 
+        self.in_d = None
         self.scope = None
 
     def initialize(self, in_d, scope):
@@ -704,13 +704,13 @@ def get_outdim(self):
                 [max(od_i, id_i) for (od_i, id_i) in zip(out_d_b, in_d)])
 
         else:
-            # flattens both input and output. 
+            # flattens both input and output.
             out_d_b_flat = np.product(out_d_b)
             in_d_flat = np.product(in_d)
             out_d = (max(out_d_b_flat, in_d_flat) ,)
 
         return out_d
-        
+
     def is_specified(self):
         return self.b.is_specified()
 
@@ -727,7 +727,7 @@ def repr_program(self):
         return (name, args)
 
     def repr_model(self):
-        name = self.__class__.__name__ 
+        name = self.__class__.__name__
         b = self.b.repr_model()
         r = (name, b[0])
 
@@ -737,7 +737,7 @@ def compile(self, in_x, train_feed, eval_feed):
         # NOTE: this function requires that target dims dominate (bigger or
         # equal component wise) in_dims. this is the case in how it is used
         # currently in the code below.
-        compute_padding_fn = lambda in_dims, out_dims: [ [0, max(0, od_i - id_i)] 
+        compute_padding_fn = lambda in_dims, out_dims: [ [0, max(0, od_i - id_i)]
                 for (id_i, od_i) in zip(in_dims, out_dims) ]
 
         out_d_b = self.b.get_outdim()
@@ -758,10 +758,10 @@ def compile(self, in_x, train_feed, eval_feed):
         # computing the padding for both b and the input.
         # NOTE: adds no padding to the data dimension (i.e., the initial [0, 0])
         paddings_b = [[0, 0]] + compute_padding_fn(out_d_b, out_d)
-        out_y_b_padded = tf.pad(out_y_b, paddings_b, "CONSTANT") 
+        out_y_b_padded = tf.pad(out_y_b, paddings_b, "CONSTANT")
 
         paddings_in = [[0, 0]] + compute_padding_fn(in_d, out_d)
-        in_x_padded = tf.pad(in_x, paddings_in, "CONSTANT") 
+        in_x_padded = tf.pad(in_x, paddings_in, "CONSTANT")
 
         # finally combine the results with the appropriate dimensions.
         out_y = out_y_b_padded + in_x_padded
@@ -785,8 +785,8 @@ def compile(self, in_x, train_feed, eval_feed):
 
 # it is kind of like the previous one.
 class ChoiceBisection:
-    """Does bissection on all the hyperparameters of module taken as argument. 
-    This can be useful to increase sharing between hyperparameter values in 
+    """Does bissection on all the hyperparameters of module taken as argument.
+    This can be useful to increase sharing between hyperparameter values in
     approaches such as MCTS.
     """
     def __init__(self, b):
@@ -798,7 +798,7 @@ def __init__(self, b):
         self.cur_name = None
         self.cur_vals = None
         # hist is kept to create the create a name for the binary choice with
-        # a suffix that corresponds to the sequence of binary decisions done 
+        # a suffix that corresponds to the sequence of binary decisions done
         # so far.
 
     def initialize(self, in_d, scope):
@@ -835,8 +835,8 @@ def choose(self, choice_i):
         assert self.is_bisecting and choice_i == 0 or choice_i == 1
 
         # doing the bissection
-        mid = int( (self.left + self.right) / 2.0 ) 
-        if choice_i == 0: 
+        mid = int( (self.left + self.right) / 2.0 )
+        if choice_i == 0:
             self.right = mid
         else:
             self.left = mid
@@ -869,7 +869,7 @@ def Optional_fn(b):
     return Or([Empty(), b])
 
 def Nonlinearity_fn(nonlin_types):
-    bs = [] 
+    bs = []
 
     for t in nonlin_types:
         if t == "relu":
@@ -886,7 +886,7 @@ def Nonlinearity_fn(nonlin_types):
     return Or(bs)
 
 def Pooling2D_fn(pooling_types, window_lens, strides, paddings):
-    bs = [] 
+    bs = []
 
     for t in pooling_types:
         if t == "max":
@@ -901,12 +901,12 @@ def Pooling2D_fn(pooling_types, window_lens, strides, paddings):
     return Or(bs)
 
 def MaybeSwap_fn(b1, b2):
-    """Builds a module that has a parameter to swapping the order of modules 
+    """Builds a module that has a parameter to swapping the order of modules
     passed as argument.
     """
 
     b = Or([
-            Concat([b1, b2]), 
+            Concat([b1, b2]),
             Concat([b2, b1])
         ])
     return b

From 99ae19555b1057e4207dccfba7e7f91d7dd4c33e Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Wed, 9 Aug 2017 14:49:15 +0200
Subject: [PATCH 3/9] fixed print for py3

---
 darch/searchers.py | 48 +++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/darch/searchers.py b/darch/searchers.py
index ea836d1..c2caa38 100644
--- a/darch/searchers.py
+++ b/darch/searchers.py
@@ -1,4 +1,4 @@
-
+from __future__ import print_function
 import numpy as np
 import scipy.sparse as sp
 import sklearn.linear_model as lm
@@ -11,23 +11,23 @@ def evaluate_and_print(evaluator, model,
 
     if output_to_terminal:
         pprint( model.repr_model() , width=40, indent=2)
-        print 
-        
+        print()
+
     if ignore_invalid_models:
-        try: 
+        try:
             sc = evaluator.eval_model(model)
 
         except ValueError:
             if output_to_terminal:
-                print "Invalid Model!"
+                print("Invalid Model!")
                 return None
     else:
-        sc = evaluator.eval_model(model)   
-    
+        sc = evaluator.eval_model(model)
+
     return sc
 
 # save the history on the model. will be useful for cached evaluation.
-def maybe_register_choice_hist(model, hist, save_hist_in_model):    
+def maybe_register_choice_hist(model, hist, save_hist_in_model):
         if save_hist_in_model:
             assert not hasattr(model, 'choice_hist')
             model.choice_hist = hist
@@ -76,12 +76,12 @@ def run_enumeration_searcher(evaluator, searcher,
     hists = []
     for (mdl, h) in searcher.enumerate_models():
 
-        sc = evaluate_and_print(evaluator, model, 
+        sc = evaluate_and_print(evaluator, model,
                 output_to_terminal, ignore_invalid_models)
         if sc != None:
             scores.append(sc)
             hists.append(h)
-    
+
     return (scores, hists)
 
 class RandomSearcher:
@@ -112,7 +112,7 @@ def sample_models(self, nsamples):
 
         return (samples, choice_hists)
 
-def run_random_searcher(evaluator, searcher, num_models, 
+def run_random_searcher(evaluator, searcher, num_models,
         output_to_terminal=False, ignore_invalid_models=False,
         save_hist_in_model=False):
 
@@ -126,7 +126,7 @@ def run_random_searcher(evaluator, searcher, num_models,
         hist = tuple(hists[0])
         maybe_register_choice_hist(mdl, hist, save_hist_in_model)
 
-        sc = evaluate_and_print(evaluator, mdl, 
+        sc = evaluate_and_print(evaluator, mdl,
                 output_to_terminal, ignore_invalid_models)
         if sc != None:
             srch_choice_hists.append(hist)
@@ -274,7 +274,7 @@ def forget_epoch(self, epoch_i):
         self.histories.pop(epoch_i)
 
     def tell_observed_scores(self, epoch_i, sample_inds, scores):
-        """Update the state of the searcher based on the actual scores of the 
+        """Update the state of the searcher based on the actual scores of the
         models proposed.
 
         """
@@ -333,7 +333,7 @@ def run_smbo_searcher(evaluator, searcher,
         maybe_register_choice_hist(mdl, hist, save_hist_in_model)
 
         #sc = evaluator.eval_model(mdl)
-        sc = evaluate_and_print(evaluator, mdl, 
+        sc = evaluate_and_print(evaluator, mdl,
                 output_to_terminal, ignore_invalid_models)
         if sc != None:
             ep_model_inds.append(i)
@@ -354,11 +354,11 @@ def run_smbo_searcher(evaluator, searcher,
         (epoch_i, models, choice_hists, pred_scores) = \
                 searcher.sample_new_epoch(nsamples_epoch)
 
-        # if it is an exploration episode, shuffle the order given by the 
+        # if it is an exploration episode, shuffle the order given by the
         model_ordering = range(len(models))
         if np.random.rand() < explore_prob:
             np.random.shuffle(model_ordering)
-        
+
         # goes through the models in the order specified.
         for mdl_i in model_ordering:
             mdl = models[mdl_i]
@@ -366,7 +366,7 @@ def run_smbo_searcher(evaluator, searcher,
             maybe_register_choice_hist(mdl, hist, save_hist_in_model)
 
             #sc = evaluator.eval_model(mdl)
-            sc = evaluate_and_print(evaluator, mdl, 
+            sc = evaluate_and_print(evaluator, mdl,
                     output_to_terminal, ignore_invalid_models)
             if sc != None:
                 searcher.tell_observed_scores(epoch_i, [mdl_i], [sc])
@@ -380,7 +380,7 @@ def run_smbo_searcher(evaluator, searcher,
     # which is not desirable if the searcher already had information there.
     # this will be kept for now.
     # a fix would be to count the models evaluated and only return those.
-    # NOTE: COMEBACK as it is now, it only returns the information about the 
+    # NOTE: COMEBACK as it is now, it only returns the information about the
     # the models that were evaluated in in this turn.
     srch_scores = searcher.known_scores[-num_evals:]
     srch_choice_hists = searcher.known_hists[-num_evals:]
@@ -483,7 +483,7 @@ def update_stats(self, score):
     def best_child(self, exploration_bonus):
         assert not self.is_leaf()
 
-        # if two nodes have the same score. 
+        # if two nodes have the same score.
         best_inds = None
         best_score = -np.inf
 
@@ -491,23 +491,23 @@ def best_child(self, exploration_bonus):
         for (i, node) in enumerate(self.children):
             # NOTE: potentially, do a different definition for the scores.
             # especially once the surrogate model is introduced.
-            # selection policy may be somewhat biased towards what the 
+            # selection policy may be somewhat biased towards what the
             # rollout policy based on surrogate functions says.
             # think about how to extend this.
             if node.num_trials > 0:
-                score = ( node.sum_scores / node.num_trials + 
+                score = ( node.sum_scores / node.num_trials +
                             exploration_bonus * np.sqrt(
                                 2.0 * parent_log_nt / node.num_trials) )
             else:
                 score = np.inf
-            
+
             # keep the best node.
             if score > best_score:
                 best_inds = [i]
                 best_score = score
             elif score == best_score:
                 best_inds.append(i)
-            
+
             # draw a child at random and expand.
             best_i = np.random.choice(best_inds)
             best_child = self.children[best_i]
@@ -535,7 +535,7 @@ def run_mcts_searcher(evaluator, searcher, num_models,
         maybe_register_choice_hist(mdl, cache_hist, save_hist_in_model)
 
         # evaluation of the model.
-        sc = evaluate_and_print(evaluator, mdl, 
+        sc = evaluate_and_print(evaluator, mdl,
                 output_to_terminal, ignore_invalid_models)
         if sc != None:
         #sc = np.random.random() ### come back here.

From 10ed86dc50f7d2130c1ad53649b0190b7ca16f6d Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Wed, 9 Aug 2017 15:09:34 +0200
Subject: [PATCH 4/9] replaced xrange by range (py3 compatible)

---
 darch/datasets.py   |  2 +-
 darch/evaluators.py |  4 ++--
 darch/searchers.py  | 24 ++++++++++++------------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/darch/datasets.py b/darch/datasets.py
index 5bf3c1d..6686e12 100644
--- a/darch/datasets.py
+++ b/darch/datasets.py
@@ -202,7 +202,7 @@ def random_crop(X, out_height, out_width):
     start_is = np.random.randint(in_height - out_height + 1, size=num_examples)
     start_js = np.random.randint(in_width - out_width + 1, size=num_examples)
     out_X = []
-    for ind in xrange(num_examples):
+    for ind in range(num_examples):
         st_i = start_is[ind]
         st_j = start_js[ind]
 
diff --git a/darch/evaluators.py b/darch/evaluators.py
index 7fc4086..c57f1c4 100644
--- a/darch/evaluators.py
+++ b/darch/evaluators.py
@@ -128,11 +128,11 @@ def compute_accuracy(dataset, ev_feed, ev_batch_size):
             val_num_examples = self.val_dataset.get_num_examples()
 
             # Training cycle
-            for epoch in xrange(self.training_epochs):
+            for epoch in range(self.training_epochs):
                 avg_cost = 0.
                 total_batch = int(train_num_examples / batch_size)
                 # Loop over all batches
-                for i in xrange(total_batch):
+                for i in range(total_batch):
                     batch_x, batch_y = self.train_dataset.next_batch(batch_size)
                     #print((batch_x.shape, batch_y.shape))
                     #import ipdb; ipdb.set_trace()
diff --git a/darch/searchers.py b/darch/searchers.py
index c2caa38..41189da 100644
--- a/darch/searchers.py
+++ b/darch/searchers.py
@@ -51,7 +51,7 @@ def _enumerate_models_iter(self, b, choice_hist):
             name, vals = b.get_choices()
 
             # recurse on the enumeration for each of the possible choices.
-            for choice_i in xrange(len(vals)):
+            for choice_i in range(len(vals)):
                 bi = copy.deepcopy(b)
                 bi.choose(choice_i)
                 for (bk, choice_histk) in self._enumerate_models_iter(
@@ -94,7 +94,7 @@ def sample_models(self, nsamples):
 
         samples = []
         choice_hists = []
-        for _ in xrange(nsamples):
+        for _ in range(nsamples):
             bk = copy.deepcopy(b)
             bk.initialize(self.in_d, Scope())
             hist = []
@@ -161,8 +161,8 @@ def _build_feature_maps(self, bs, maxlen, thres):
             # filtering out empty modules in the sequence
             bls = [b[0] for b in tuple(b.repr_model()) if b[0] != "Empty"]
 
-            for k in xrange(1, maxlen):
-                for i in xrange(len(bls) - k):
+            for k in range(1, maxlen):
+                for i in range(len(bls) - k):
                     ngram = tuple(bls[i:i + k])
 
                     if ngram not in ngram_to_count:
@@ -191,8 +191,8 @@ def _compute_features(self, model):
         feats[0, 0] = len(bls)
 
         # ngrams features
-        for k in xrange(1, self.ngram_maxlen):
-            for i in xrange(len(bls) - k):
+        for k in range(1, self.ngram_maxlen):
+            for i in range(len(bls) - k):
                 ngram = tuple(bls[i:i + k])
 
                 if ngram in self.module_ngram_to_id:
@@ -232,7 +232,7 @@ def sample_new_epoch(self, nsamples):
         scores = []
         choice_hists = []
 
-        for _ in xrange(nsamples):
+        for _ in range(nsamples):
             bk = copy.deepcopy(self.b_search)
             bk.initialize(self.in_d, Scope())
             hist = []
@@ -327,7 +327,7 @@ def run_smbo_searcher(evaluator, searcher,
     num_evals = 0
     ep_model_inds = []
     ep_true_scores = []
-    for i in xrange(len(models)):
+    for i in range(len(models)):
         mdl = models[i]
         hist = choice_hists[i]
         maybe_register_choice_hist(mdl, hist, save_hist_in_model)
@@ -347,7 +347,7 @@ def run_smbo_searcher(evaluator, searcher,
     # compute the string representations from which the features are going to
     # be derived.
     evaluated_models = {b.repr_model() for b in models}
-    for i in xrange(nsamples_after):
+    for i in range(nsamples_after):
         if i % refit_interval == 0:
             searcher.refit_model()
 
@@ -406,7 +406,7 @@ def sample_models(self, num_samples):
         models = []
         choice_hists = []
 
-        for _ in xrange(num_samples):
+        for _ in range(num_samples):
             # initialization of the model.
             bk = copy.deepcopy(self.b_search)
             bk.initialize(self.in_d, Scope())
@@ -516,7 +516,7 @@ def best_child(self, exploration_bonus):
 
     # expands a node creating all the placeholders for the children.
     def expand(self, num_children):
-        self.children = [MCTSTreeNode(self) for _ in xrange(num_children)]
+        self.children = [MCTSTreeNode(self) for _ in range(num_children)]
 
 # NOTE: if the search space has holes, it break. needs try/except module.
 def run_mcts_searcher(evaluator, searcher, num_models,
@@ -526,7 +526,7 @@ def run_mcts_searcher(evaluator, searcher, num_models,
     srch_choice_hists = []
     srch_scores = []
 
-    for _ in xrange(num_models):
+    for _ in range(num_models):
         (models, hists) = searcher.sample_models(1)
         mdl = models[0]
         # has to join the tree and rollout histories to make a normal history.

From cd3f58f24fa127faf96655e0b004b3b45a83ebc1 Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Wed, 9 Aug 2017 15:13:47 +0200
Subject: [PATCH 5/9] explicit integer division (py3 compatibility)

---
 darch/datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/darch/datasets.py b/darch/datasets.py
index 6686e12..7558d29 100644
--- a/darch/datasets.py
+++ b/darch/datasets.py
@@ -187,8 +187,8 @@ def center_crop(X, out_height, out_width):
     num_examples, in_height, in_width, in_depth = X.shape
     assert out_height <= in_height and out_width <= in_width
 
-    start_i = (in_height - out_height) / 2
-    start_j = (in_width - out_width) / 2
+    start_i = (in_height - out_height) // 2
+    start_j = (in_width - out_width) // 2
     out_X = X[:, start_i : start_i + out_height, start_j : start_j + out_width, :]
 
     return out_X

From de0794d8df767de601e24b7ee5b486733deb3468 Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Wed, 9 Aug 2017 15:23:12 +0200
Subject: [PATCH 6/9] added hidden files and data to gitignore

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 7e99e36..72eb439 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
-*.pyc
\ No newline at end of file
+*.pyc
+data/
+.*
+!/.gitignore

From 9434712ebb01ae35cea0797252d191364a494c0c Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Thu, 10 Aug 2017 12:56:35 +0200
Subject: [PATCH 7/9] added setup.py

---
 setup.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 setup.py

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..db00faa
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,67 @@
+from setuptools import setup, find_packages
+from codecs import open
+from os import path
+
+here = path.abspath(path.dirname(__file__))
+
+# Get the long description from the README file
+with open(path.join(here, 'README.md'), encoding='utf-8') as f:
+    long_description = f.read()
+
+setup(
+    name='darch',
+
+    # Versions should comply with PEP440.  For a discussion on single-sourcing
+    # the version across setup.py and the project code, see
+    # https://packaging.python.org/en/latest/single_source_version.html
+    version='0.1.0',
+
+    description='Deep Architect',
+    long_description=long_description,
+
+    # The project's main homepage.
+    url='https://github.com/negrinho/deep_architect',
+
+    # Author details
+    author='The Python Packaging Authority',
+    author_email='pypa-dev@googlegroups.com',
+
+    # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
+    classifiers=[
+        # How mature is this project? Common values are
+        #   3 - Alpha
+        #   4 - Beta
+        #   5 - Production/Stable
+        'Development Status :: 3 - Alpha',
+
+        # Indicate who your project is intended for
+        'Intended Audience :: Developers',
+
+        # Specify the Python versions you support here. In particular, ensure
+        # that you indicate whether you support Python 2, Python 3 or both.
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+    ],
+
+    # What does your project relate to?
+    keywords='deep architect',
+
+    # You can just specify the packages manually here if your project is
+    # simple. Or you can use find_packages().
+    packages=['darch'],
+
+    # List run-time dependencies here.  These will be installed by pip when
+    # your project is installed. For an analysis of "install_requires" vs pip's
+    # requirements files see:
+    # https://packaging.python.org/en/latest/requirements.html
+    install_requires=[
+            'numpy',
+            'scipy',
+            'tensorflow',
+            'sklearn'
+            ]
+)

From 57af4c5288ac92c922a96607f505dc14421bd52d Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Thu, 10 Aug 2017 13:06:27 +0200
Subject: [PATCH 8/9] removed tf from requirements

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index db00faa..64f5bfb 100644
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,6 @@
     install_requires=[
             'numpy',
             'scipy',
-            'tensorflow',
             'sklearn'
             ]
 )

From 59b0db97de13790786a26d5c28514df2f16ad851 Mon Sep 17 00:00:00 2001
From: Daniel Ferreira <dcferreira@users.noreply.github.com>
Date: Thu, 10 Aug 2017 13:13:34 +0200
Subject: [PATCH 9/9] typo

---
 darch/datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/darch/datasets.py b/darch/datasets.py
index 7558d29..43d76b9 100644
--- a/darch/datasets.py
+++ b/darch/datasets.py
@@ -177,9 +177,9 @@ def onehot_to_idx(y_onehot):
     return y_idx
 
 def idx_to_onehot(y_idx, num_classes):
-    num_images = y.shape[0]
+    num_images = y_idx.shape[0]
     y_one_hot = np.zeros( (num_images, num_classes), dtype='float32')
-    y_one_hot[ np.arange(num_images),  y ] = 1.0
+    y_one_hot[ np.arange(num_images),  y_idx ] = 1.0
 
     return y_one_hot