Merge pull request #79 from holgerteichgraeber/32bit

holgerteichgraeber · web-flow · commit f2965e47fabc · 2019-04-29T15:19:27.000-07:00
32bit ready
diff --git a/Project.toml b/Project.toml
@@ -4,7 +4,7 @@ keywords = ["clustering", "JuMP", "optimization"]
 license = "MIT"
 desc = "julia implementation of using different clustering methods for finding representative periods for the optimization of energy systems"
 author = ["Holger Teichgraeber"]
-version = "0.3.2"
+version = "0.3.3"
 
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -4,7 +4,7 @@
 [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://holgerteichgraeber.github.io/ClustForOpt.jl/dev)
 [![Build Status](https://travis-ci.com/holgerteichgraeber/ClustForOpt.jl.svg?token=HRFemjSxM1NBCsbHGNDG&branch=master)](https://travis-ci.com/holgerteichgraeber/ClustForOpt.jl)
 
-[ClustForOpt](https://github.com/holgerteichgraeber/ClustForOpt.jl) is a [julia](https://www.juliaopt.com) implementation of clustering methods for finding representative periods for optimization problems. A utilization in a scalable capacity expansion problem can be found in the package [CEP](https://github.com/YoungFaithful/CEP.jl).
+[ClustForOpt](https://github.com/holgerteichgraeber/ClustForOpt.jl) is a [julia](https://www.juliaopt.com) implementation of clustering methods for finding representative periods for optimization problems. A utilization in a scalable capacity expansion problem can be found in the package [CEP](https://github.com/YoungFaithful/CapacityExpansion.jl).
 
 The package has two main purposes: 1) Provide a simple process of clustering time-series input data, with clustered data output in a generalized type system 2) provide an interface between clustered data and optimization problem.
 
@@ -33,6 +33,6 @@ Install using:
 
 ```julia
 ]
-add https://github.com/holgerteichgraeber/ClustForOpt.jl.git
+add ClustForOpt
 ```
 where `]` opens the julia package manager.
diff --git a/examples/workflow_introduction.jl b/examples/workflow_introduction.jl
@@ -14,7 +14,7 @@ ts_input_data = load_timeseries_data(data_path; T=24, years=[2016])
 
 #= ClustData
 How the struct is setup:
-    ClustData{region::String,K::Int,T::Int,data::Dict{String,Array},weights::Array{Float64},mean::Dict{String,Array},sdv::Dict{String,Array}} <: TSData
+    ClustData{region::String,K::Int,T::Int,data::Dict{String,Array},weights::Array{AbstractFloat},mean::Dict{String,Array},sdv::Dict{String,Array}} <: TSData
 -region: specifies region data belongs to
 -K: number of periods
 -T: time steps per period
diff --git a/src/clustering/attribute_weighting.jl b/src/clustering/attribute_weighting.jl
@@ -1,10 +1,10 @@
 """
-function attribute_weighting(data::ClustData,attribute_weights::Dict{String,Float64})
+function attribute_weighting(data::ClustData,attribute_weights::Dict{String,AbstractFloat})
 
 apply the different attribute weights based on the dictionary entry for each tech or exact name
 """
 function attribute_weighting(data::ClustData,
-                              attribute_weights::Dict{String,Float64}
+                              attribute_weights::Dict{String,AbstractFloat}
                               )
   for name in keys(data.data)
     tech=split(name,"-")[1]
diff --git a/src/clustering/exact_kmedoids.jl b/src/clustering/exact_kmedoids.jl
@@ -2,15 +2,15 @@
 
 "Holds results of kmedoids run"
 mutable struct kmedoidsResult
-    medoids::Array{Float64}
+    medoids::Array{AbstractFloat}
     assignments::Array{Int}
-    totalcost::Float64
+    totalcost::AbstractFloat
 end
 
 
 """
     kmedoids_exact(
-     data::Array{Float64},
+     data::Array{AbstractFloat},
      nclust::Int,
      _dist::SemiMetric = SqEuclidean(),
      env::Any;
@@ -21,7 +21,7 @@ Performs the exact kmedoids algorithm as in Kotzur et al, 2017
 optimizer=Gurobi.Optimizer
 """
 function kmedoids_exact(
-   data::Array{Float64},
+   data::Array{AbstractFloat},
    nclust::Int,
    optimizer::DataType;
    _dist::SemiMetric = SqEuclidean(),
diff --git a/src/clustering/extreme_vals.jl b/src/clustering/extreme_vals.jl
@@ -67,7 +67,7 @@ function simple_extr_val_ident(data::ClustData,
 end
 
 """
-    simple_extr_val_ident(data::Array{Float64};extremum="max",peak_def="absolute")
+    simple_extr_val_ident(data::Array{AbstractFloat};extremum="max",peak_def="absolute")
 identifies a single simple extreme period from the data and returns column index of extreme period
 - `data_type`: any attribute from the attributes contained within *data*
 - `extremum`: "min" or "max"
@@ -78,7 +78,7 @@ function simple_extr_val_ident(clust_data::ClustData,
                                data_type::String;
                                extremum::String="max",
                                peak_def::String="absolute",
-                               consecutive_periods::Int64=1)
+                               consecutive_periods::Int=1)
   data=clust_data.data[data_type]
   delta_period=consecutive_periods-1
   # set data to be compared
@@ -168,7 +168,7 @@ function extreme_val_output(data::ClustData,
   end
   weights_ed=[]
   #initiate new k-ids-ed that don't represent any original time-period
-  k_ids_ed=zeros(Int64,size(data.k_ids))
+  k_ids_ed=zeros(Int,size(data.k_ids))
   if rep_mod_method == "feasibility"
     weights_ed = zeros(length(unique_extr_val_idcs))
     #no representation is done of the original time-period, it's just for feasibility
diff --git a/src/clustering/intraperiod_segmentation.jl b/src/clustering/intraperiod_segmentation.jl
@@ -28,17 +28,17 @@ function intraperiod_segmentation(data_merged::ClustDataMerged;
 end
 
 """
-      run_clust_segmentation(period::Array{Float64,2};n_seg::Int=24,iterations::Int=300,norm_scope::String="full")
+      run_clust_segmentation(period::Array{AbstractFloat,2};n_seg::Int=24,iterations::Int=300,norm_scope::String="full")
 !!! Not yet proven implementation of segmentation introduced by Bahl et al. 2018
 """
-function run_clust_segmentation(period::Array{Float64,2};
+function run_clust_segmentation(period::Array{AbstractFloat,2};
                 n_seg::Int=24,
                 iterations::Int=300,
                 norm_scope::String="full")
   norm_period, typely_mean, typely_sdv=z_normalize(period;scope=norm_scope)
   #x,weights,clustids,x,iter= run_clust_hierarchical(norm_period,n_seg,iterations)
   data=norm_period
-  clustids=run_clust_hierarchical_partitional(data::Array, n_seg::Int64)
+  clustids=run_clust_hierarchical_partitional(data::Array, n_seg::Int)
   weights = calc_weights(clustids,n_seg)
 
 
@@ -48,7 +48,7 @@ function run_clust_segmentation(period::Array{Float64,2};
   return centers,weights,clustids,cost,1
 end
 
-function get_clustids(ends::Array{Int64,1})
+function get_clustids(ends::Array{Int,1})
   clustids=collect(1:size(data,2))
   j=1
   for i in 1:size(data,2)
@@ -61,12 +61,12 @@ function get_clustids(ends::Array{Int64,1})
 end
 
 """
-      run_clust_hierarchical_partitional(data::Array, n_seg::Int64)
+      run_clust_hierarchical_partitional(data::Array, n_seg::Int)
 !!! Not yet proven
 Usees provided data and number of segments to aggregate them together
 """
 function run_clust_hierarchical_partitional(data::Array,
-                                            n_seg::Int64)
+                                            n_seg::Int)
   _dist= SqEuclidean()
   #Assign each timeperiod it's own cluster
   clustids=collect(1:size(data,2))
@@ -99,20 +99,20 @@ function run_clust_hierarchical_partitional(data::Array,
 end
 
 """
-      merge_clustids!(clustids::Array{Int64,1},index::Int64)
+      merge_clustids!(clustids::Array{Int,1},index::Int)
 Calculate the new clustids by merging the cluster of the index provided with the cluster of index+1
 """
-function merge_clustids!(clustids::Array{Int64,1},index::Int64)
+function merge_clustids!(clustids::Array{Int,1},index::Int)
   clustids[index+1]=clustids[index]
   clustids[index+2:end].-=1
 end
 
 """
-      get_mean_data(data::Array, clustids::Array{Int64,1})
+      get_mean_data(data::Array, clustids::Array{Int,1})
 Calculate mean of data: The number of columns is kept the same, mean is calculated for aggregated columns and the same in all with same clustid
 """
 function get_mean_data(data::Array,
-                    clustids::Array{Int64,1})
+                    clustids::Array{Int,1})
   mean_data=zeros(size(data))
   for i in 1:size(data,2)
     mean_data[:,i]=mean(data[:,findall(clustids.==clustids[i])], dims=2)
diff --git a/src/clustering/run_clust.jl b/src/clustering/run_clust.jl
@@ -1,6 +1,6 @@
 
 """
-    run_clust(data::ClustData;norm_op::String="zscore",norm_scope::String="full",method::String="kmeans",representation::String="centroid",n_clust::Int=5,n_init::Int=100,iterations::Int=300,save::String="",attribute_weights::Dict{String,Float64}=Dict{String,Float64}(),get_all_clust_results::Bool=false,kwargs...)
+    run_clust(data::ClustData;norm_op::String="zscore",norm_scope::String="full",method::String="kmeans",representation::String="centroid",n_clust::Int=5,n_init::Int=100,iterations::Int=300,save::String="",attribute_weights::Dict{String,AbstractFloat}=Dict{String,AbstractFloat}(),get_all_clust_results::Bool=false,kwargs...)
 norm_op: "zscore", "01"(not implemented yet)
 norm_scope: "full","sequence","hourly"
 method: "kmeans","kmedoids","kmedoids_exact","hierarchical"
@@ -15,7 +15,7 @@ function run_clust(data::ClustData;
       n_seg::Int=data.T,
       n_init::Int=100,
       iterations::Int=300,
-      attribute_weights::Dict{String,Float64}=Dict{String,Float64}(),
+      attribute_weights::Dict{String,AbstractFloat}=Dict{String,AbstractFloat}(),
       save::String="",#QUESTION dead?
       get_all_clust_results::Bool=false,
       kwargs...
@@ -62,7 +62,7 @@ function run_clust(data_norm_merged::ClustDataMerged;
                   n_clust::Int=5,
                   n_init::Int=100,
                   iterations::Int=300,
-                  orig_k_ids::Array{Int64,1}=Array{Int64,1}(),
+                  orig_k_ids::Array{Int,1}=Array{Int,1}(),
                   kwargs...)
 
 method: "kmeans","kmedoids","kmedoids_exact","hierarchical"
@@ -75,13 +75,13 @@ function run_clust(data_norm_merged::ClustDataMerged,
                   n_clust::Int=5,
                   n_init::Int=100,
                   iterations::Int=300,
-                  orig_k_ids::Array{Int64,1}=Array{Int64,1}(),
+                  orig_k_ids::Array{Int,1}=Array{Int,1}(),
                   kwargs...)
     # initialize data arrays
-    centers = Array{Array{Float64},1}(undef,n_init)
+    centers = Array{Array{AbstractFloat},1}(undef,n_init)
     clustids = Array{Array{Int,1},1}(undef,n_init)
-    weights = Array{Array{Float64},1}(undef,n_init)
-    cost = Array{Float64,1}(undef,n_init)
+    weights = Array{Array{AbstractFloat},1}(undef,n_init)
+    cost = Array{AbstractFloat,1}(undef,n_init)
     iter = Array{Int,1}(undef,n_init)
 
     # clustering
@@ -346,7 +346,7 @@ end
 Helper function to run run_clust_hierarchical_centroids and run_clust_hierarchical_medoid
 """
 function run_clust_hierarchical(
-    data::Array{Float64,2},
+    data::Array{AbstractFloat,2},
     n_clust::Int,
     iterations::Int;
     _dist::SemiMetric = SqEuclidean()
diff --git a/src/clustering/shape_based/cluster_gen_dbaclust_parallel.jl b/src/clustering/shape_based/cluster_gen_dbaclust_parallel.jl
@@ -64,7 +64,7 @@ writetable(joinpath("outfiles",string("parameters_dtw_",region,".txt")),df)
 
  # Function that can be an input to pmap
 
- @everywhere function dbac_par_sc(n_clust::Int,i::Int,rad_sc::Int,seq::Array{Float64,2},n_init::Int,iterations::Int,inner_iterations::Int) # function to use with pmap to parallelize sc band calculation
+ @everywhere function dbac_par_sc(n_clust::Int,i::Int,rad_sc::Int,seq::Array{AbstractFloat,2},n_init::Int,iterations::Int,inner_iterations::Int) # function to use with pmap to parallelize sc band calculation
 
   rmin,rmax=sakoe_chiba_band(rad_sc,24)
 
diff --git a/src/utils/datastructs.jl b/src/utils/datastructs.jl
diff --git a/src/utils/load_data.jl b/src/utils/load_data.jl
diff --git a/src/utils/utils.jl b/src/utils/utils.jl