Skip to content

Commit 5efa413

Browse files
Update ClustResult, AbstractClustResult, ClustResultAll. Update corresponding constructors and all methods that use these.
1 parent 4e58950 commit 5efa413

File tree

10 files changed

+97
-84
lines changed

10 files changed

+97
-84
lines changed

docs/src/clust.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,15 @@ FullInputData
2929
ClustData
3030
ClustDataMerged
3131
ClustResultAll
32-
ClustResultBest
33-
ClustResultSimple
32+
ClustResult
3433
```
3534

3635
## Example running clustering
3736
```@example
3837
using ClustForOpt
3938
# load ts-input-data
4039
ts_input_data = load_timeseries_data(normpath(joinpath(@__DIR__,"..","..","data","TS_GER_1")); T=24, years=[2016])
41-
ts_clust_data = run_clust(ts_input_data).best_results
40+
ts_clust_data = run_clust(ts_input_data).clust_data
4241
using Plots
4342
plot(ts_clust_data.data["solar-germany"], legend=false, linestyle=:solid, width=3, xlabel="Time [h]", ylabel="Solar availability factor [%]")
4443
savefig("clust.svg")

examples/workflow_example_bat.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ end
1717

1818
# optimization
1919

20-
opt_res = run_opt("battery",clust_res_ar[2].best_results)
21-
#opt_res = run_opt("gas_turbine",clust_res.best_results[5])
20+
opt_res = run_opt("battery",clust_res_ar[2].clust_data)
21+
#opt_res = run_opt("gas_turbine",clust_res.clust_data[5])
2222

2323
###
2424
# run optimization for all k=1:9
2525
opt_res_all = []
2626
obj=[]
2727
for i=1:2
28-
push!(opt_res_all,run_opt("battery", clust_res_ar[i].best_results))
28+
push!(opt_res_all,run_opt("battery", clust_res_ar[i].clust_data))
2929
push!(obj,opt_res_all[i].obj)
3030
end
3131
# run reference case

examples/workflow_example_extr.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ ts_input_data = load_timeseries_data(data_path; T=24, years=[2015])
1818
ts_clust_res = run_clust(ts_input_data_mod;method="kmeans",representation="centroid",n_init=10,n_clust=5) # default k-means
1919

2020
# representation modification
21-
ts_clust_extr = representation_modification(extr_vals,ts_clust_res.best_results)
21+
ts_clust_extr = representation_modification(extr_vals,ts_clust_res.clust_data)

examples/workflow_introduction.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ end
6666
#############
6767
# Quick example and investigation of the best result:
6868
ts_clust_result = run_clust(ts_input_data; method="kmeans", representation="centroid", n_init=5, n_clust=5) # note that you should use n_init=1000 at least for kmeans.
69-
ts_clust_data = ts_clust_result.best_results
69+
ts_clust_data = ts_clust_result.clust_data
7070
# And some plotting:
7171
plot_comb_solar=plot!(plot_input_solar, ts_clust_data.data["solar-germany"], linestyle=:solid, width=3)
7272
plot_clust_soar=plot(ts_clust_data.data["el_demand-germany"], legend=false, linestyle=:solid, width=3, xlabel="Time [h]", ylabel="Solar availability factor [%]")

src/ClustForOpt.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@ module ClustForOpt
2222
FullInputData,
2323
ClustData,
2424
ClustDataMerged,
25-
ClustResult,
26-
ClustResultSimple,
25+
AbstractClustResult,
2726
ClustResultAll,
28-
ClustResultBest,
27+
ClustResult,
2928
SimpleExtremeValueDescr,
3029
load_timeseries_data,
3130
combine_timeseries_weather_data,
@@ -35,6 +34,7 @@ module ClustForOpt
3534
get_sup_kw_args,
3635
run_clust,
3736
run_opt,
37+
data_type,
3838
get_EUR_to_USD, #TODO Check which of the following should really be exported
3939
z_normalize,
4040
undo_z_normalize,

src/clustering/extreme_vals.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ function input_data_modification(data::ClustData,extr_val_idcs::Array{Int,1})
140140
#just modify the k_ids that are also represented within this clust-data (don't reduce 0 to -1...)
141141
k_ids_dn[findall(data.k_ids.!=0)]=k_ids_dn_data
142142
#return the new Clust Data
143-
return ClustData(data.region,data.years,K_dn,data.T,data_dn,weights_dn,deltas_dn,k_ids_dn;mean=data.mean,sdv=data.sdv)
143+
return ClustData(data.region,data.years,K_dn,data.T,data_dn,weights_dn,k_ids_dn;delta_t=deltas_dn,mean=data.mean,sdv=data.sdv)
144144
end
145145

146146
"""
@@ -186,7 +186,7 @@ function extreme_val_output(data::ClustData,
186186
@error("rep_mod_method - "*rep_mod_method*" - does not exist")
187187
end
188188
delta_t_ed=data.delta_t[:,unique_extr_val_idcs]
189-
extr_vals = ClustData(data.region,data.years,K_ed,data.T,data_ed,weights_ed,delta_t_ed,k_ids_ed;mean=data.mean,sdv=data.sdv)
189+
extr_vals = ClustData(data.region,data.years,K_ed,data.T,data_ed,weights_ed,k_ids_ed;delta_t=delta_t_ed,mean=data.mean,sdv=data.sdv)
190190
return extr_vals
191191
end
192192

@@ -219,7 +219,7 @@ function representation_modification(extr_vals::ClustData,
219219
k_ids_mod=deepcopy(clust_data.k_ids)
220220
# if this particular original time series period is though represented in the extreme values, the new period number of the extreme value (clust_data.K+old number) is assigned to this original time series period - in case of feasibility they are all zero and nothing is changed
221221
k_ids_mod[findall(extr_vals.k_ids.!=0)]=extr_vals.k_ids[findall(extr_vals.k_ids.!=0)].+clust_data.K
222-
return ClustData(clust_data.region,clust_data.years,K_mod,clust_data.T,data_mod,weights_mod,delta_t_mod,k_ids_mod;mean=clust_data.mean,sdv=clust_data.sdv)
222+
return ClustData(clust_data.region,clust_data.years,K_mod,clust_data.T,data_mod,weights_mod,k_ids_mod;delta_t=delta_t_mod,mean=clust_data.mean,sdv=clust_data.sdv)
223223
end
224224

225225
"""

src/clustering/run_clust.jl

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,42 +24,40 @@ function run_clust(data::ClustData;
2424
# When adding new methods: add combination of clust+rep to sup_kw_args
2525
check_kw_args(norm_op,norm_scope,method,representation)
2626

27-
# normalize
28-
# TODO: implement 0-1 normalization and add as a choice to runclust
29-
data_norm = z_normalize(data;scope=norm_scope)
30-
if !isempty(attribute_weights)
31-
data_norm = attribute_weighting(data_norm,attribute_weights)
32-
end
33-
data_norm_merged = ClustDataMerged(data_norm)
34-
3527
#clustering
36-
b_merged, cost, cost_best, iter =run_clust(data_norm_merged, data; method=method, representation=representation, n_clust=n_clust, n_init=n_init, iterations=iterations, orig_k_ids=deepcopy(data.k_ids), kwargs...)
37-
38-
if n_seg!=b_merged.T && n_seg!=0
39-
b_merged=intraperiod_segmentation(b_merged;n_seg=n_seg,norm_scope=norm_scope,iterations=iterations)
40-
else
41-
n_seg=b_merged.T
28+
clust_data, cost, centers_all, weights_all, clustids_all, cost_all, iter_all =run_clust_method(data;norm_op=norm_op, norm_scope=norm_scope, method=method, representation=representation, n_clust=n_clust, n_init=n_init, iterations=iterations, attribute_weights=attribute_weights, orig_k_ids=deepcopy(data.k_ids), kwargs...)
29+
30+
# inter period segmentation (reduce the number of time steps per cluster - not fully implemented yet)
31+
if n_seg!=data.T && n_seg!=0
32+
clust_data_merged = ClustDataMerged(clust_data)
33+
segmented_merged=intraperiod_segmentation(clust_data_merged;n_seg=n_seg,norm_scope=norm_scope,iterations=iterations)
34+
clust_data = ClustData(segmented_merged)
35+
else # if interperiod segmentation is not used
36+
n_seg=clust_data.T
4237
end
4338

44-
# transfer into ClustData format
45-
best_results = ClustData(b_merged)
39+
# set configuration file
4640
clust_config = set_clust_config(;norm_op=norm_op, norm_scope=norm_scope, method=method, representation=representation, n_clust=n_clust, n_seg=n_seg, n_init=n_init, iterations=iterations, attribute_weights=attribute_weights)
47-
# save all locally converged solutions and the best into a struct
48-
41+
4942
if get_all_clust_results
50-
clust_result = ClustResultAll(best_results,b_merged.k_ids,cost_best,data_norm_merged.data_type,clust_config,b_merged.centers,b_merged.weights,b_merged.k_ids,cost,iter)
43+
# save all locally converged solutions and the best into a struct
44+
clust_result = ClustResultAll(clust_data,cost,clust_config,centers_all,weights_all,clustids_all,cost_all,iter_all)
5145
else
52-
clust_result = ClustResultBest(best_results,b_merged.k_ids,cost_best,data_norm_merged.data_type,clust_config)
46+
# save best locally converged solution into a struct
47+
clust_result = ClustResult(clust_data,cost,clust_config)
5348
end
54-
#TODO save in save file
49+
#TODO save in save file save_clust_result()
5550
return clust_result
5651
end
5752

5853
"""
59-
function run_clust(data_norm_merged::ClustDataMerged;
54+
run_clust_method(data::ClustData;
55+
norm_op::String="zscore",
56+
norm_scope::String="full",
6057
method::String="kmeans",
6158
representation::String="centroid",
6259
n_clust::Int=5,
60+
n_seg::Int=data.T,
6361
n_init::Int=100,
6462
iterations::Int=300,
6563
orig_k_ids::Array{Int,1}=Array{Int,1}(),
@@ -68,16 +66,27 @@ function run_clust(data_norm_merged::ClustDataMerged;
6866
method: "kmeans","kmedoids","kmedoids_exact","hierarchical"
6967
representation: "centroid","medoid"
7068
"""
71-
function run_clust(data_norm_merged::ClustDataMerged,
72-
data::ClustData;
69+
function run_clust_method(data::ClustData;
70+
norm_op::String="zscore",
71+
norm_scope::String="full",
7372
method::String="kmeans",
7473
representation::String="centroid",
7574
n_clust::Int=5,
75+
n_seg::Int=data.T,
7676
n_init::Int=100,
7777
iterations::Int=300,
78+
attribute_weights::Dict{String,Float64}=Dict{String,Float64}(),
7879
orig_k_ids::Array{Int,1}=Array{Int,1}(),
7980
kwargs...)
80-
# initialize data arrays
81+
# normalize
82+
# TODO: implement 0-1 normalization and add as a choice to runclust
83+
data_norm = z_normalize(data;scope=norm_scope)
84+
if !isempty(attribute_weights)
85+
data_norm = attribute_weighting(data_norm,attribute_weights)
86+
end
87+
data_norm_merged = ClustDataMerged(data_norm)
88+
89+
# initialize data arrays (all initial starting points)
8190
centers = Array{Array{Float64},1}(undef,n_init)
8291
clustids = Array{Array{Int,1},1}(undef,n_init)
8392
weights = Array{Array{Float64},1}(undef,n_init)
@@ -97,16 +106,18 @@ function run_clust(data_norm_merged::ClustDataMerged,
97106
centers[i] = resize_medoids(data,centers[i],weights[i])
98107
end
99108
end
100-
# find best
101-
# TODO: write as function
109+
# find best. TODO: write as function
102110
cost_best,ind_mincost = findmin(cost) # along dimension 2, only store indice
103111

104112
k_ids=orig_k_ids
105113
k_ids[findall(orig_k_ids.!=0)]=clustids[ind_mincost]
106114
# save in merged format as array
115+
107116
# NOTE if you need clustered data more precise than 8 digits change the following line accordingly
108-
n_digits_data_round=8 # Gurobi throws warning when rounding errors on order~1e-13 are passed in. Rounding errors occur in clustering of many zeros (e.g. solar).
109-
return ClustDataMerged(data_norm_merged.region,data_norm_merged.years,n_clust,data_norm_merged.T,round.(centers[ind_mincost]; digits=n_digits_data_round),data_norm_merged.data_type,weights[ind_mincost],k_ids), cost, cost_best, iter
117+
n_digits_data_round=8 # Gurobi throws warning when rounding errors on order~1e-13 are passed in. Rounding errors occur in clustering of many zeros (e.g. solar).
118+
clust_data_merged = ClustDataMerged(data.region,data.years,n_clust,data.T,round.(centers[ind_mincost]; digits=n_digits_data_round),data_type(data),weights[ind_mincost],k_ids)
119+
clust_data = ClustData(clust_data_merged)
120+
return clust_data, cost_best, centers, weights, clustids, cost, iter
110121
end
111122

112123
"""
@@ -129,7 +140,7 @@ function run_clust(
129140
save::String="",
130141
kwargs...
131142
)
132-
results_ar = Array{ClustResult,1}(undef,length(n_clust_ar))
143+
results_ar = Array{AbstractClustResult,1}(undef,length(n_clust_ar))
133144
for i=1:length(n_clust_ar)
134145
results_ar[i] = run_clust(data;norm_op=norm_op,norm_scope=norm_scope,method=method,representation=representation,n_init=n_init,n_clust=n_clust_ar[i],iterations=iterations,save=save,kwargs...)
135146
end

src/utils/datastructs.jl

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
abstract type InputData end
33
abstract type TSData <:InputData end
44
abstract type OptData <: InputData end
5-
abstract type ClustResult end
5+
abstract type AbstractClustResult end
66

77
"FullInputData"
88
struct FullInputData <: TSData
@@ -53,34 +53,23 @@ struct ClustDataMerged <: TSData
5353
end
5454

5555
"ClustResultAll"
56-
struct ClustResultAll <: ClustResult
57-
best_results::ClustData
58-
best_ids::Array{Int,1}
59-
best_cost::Float64
60-
data_type::Array{String}
61-
clust_config::Dict{String,Any}
62-
centers::Array{Array{Float64},1}
63-
weights::Array{Array{Float64},1}
64-
clustids::Array{Array{Int,1},1}
65-
cost::Array{Float64,1}
66-
iter::Array{Int,1}
56+
struct ClustResultAll <: AbstractClustResult
57+
clust_data::ClustData
58+
cost::Float64
59+
config::Dict{String,Any}
60+
centers_all::Array{Array{Float64},1}
61+
weights_all::Array{Array{Float64},1}
62+
clustids_all::Array{Array{Int,1},1}
63+
cost_all::Array{Float64,1}
64+
iter_all::Array{Int,1}
6765
end
6866

6967
# TODO: not used yet, but maybe best to implement this one later for users who just want to use clustering but do not care about the locally converged solutions
70-
"ClustResultBest"
71-
struct ClustResultBest <: ClustResult
72-
best_results::ClustData
73-
best_ids::Array{Int,1}
74-
best_cost::Float64
75-
data_type::Array{String}
76-
clust_config::Dict{String,Any}
77-
end
78-
79-
"ClustResultSimple"
80-
struct ClustResultSimple <: ClustResult
81-
best_results::ClustData
82-
#TODO: clust_data::ClustData
83-
clust_config::Dict{String,Any}
68+
"ClustResult"
69+
struct ClustResult <: AbstractClustResult
70+
clust_data::ClustData
71+
cost::Float64
72+
config::Dict{String,Any}
8473
end
8574

8675
"SimpleExtremeValueDescr"
@@ -226,8 +215,8 @@ function ClustData(region::String,
226215
T::Int,
227216
data::Dict{String,Array},
228217
weights::Array{Float64},
229-
delta_t::Array{Float64,2},
230218
k_ids::Array{Int,1};
219+
delta_t::Array{Float64,2}=ones(T,K),
231220
mean::Dict{String,Array}=Dict{String,Array}(),
232221
sdv::Dict{String,Array}=Dict{String,Array}()
233222
)
@@ -268,7 +257,7 @@ function ClustData(data::FullInputData,
268257
for (k,v) in data.data
269258
data_reshape[k] = reshape(v,T,K)
270259
end
271-
return ClustData(data.region,data.years,K,T,data_reshape,ones(K),ones(T,K),collect(1:K))
260+
return ClustData(data.region,data.years,K,T,data_reshape,ones(K),collect(1:K))
272261
end
273262

274263
"""

src/utils/utils.jl

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ function z_normalize(data::ClustData;
4040
for (k,v) in data.data
4141
data_norm[k],mean[k],sdv[k] = z_normalize(v,scope=scope)
4242
end
43-
return ClustData(data.region,data.years,data.K,data.T,data_norm,data.weights,data.delta_t,data.k_ids;mean=mean,sdv=sdv)
43+
return ClustData(data.region,data.years,data.K,data.T,data_norm,data.weights,data.k_ids;delta_t=data.delta_t,mean=mean,sdv=sdv)
4444
end
4545

4646
"""
@@ -327,7 +327,7 @@ function run_pure_clust(data::ClustData;
327327
get_all_clust_results::Bool=false,
328328
kwargs...)
329329
clust_result=run_clust(data;norm_op=norm_op,norm_scope=norm_scope,method=method,representation=representation,n_clust=n_clust,n_init=n_init,iterations=iterations,attribute_weights=attribute_weights)
330-
clust_data=clust_result.best_results
330+
clust_data=clust_result.clust_data
331331
mod_data=deepcopy(data.data)
332332
for i in 1:clust_data.K
333333
index=findall(clust_data.k_ids.==i)
@@ -338,5 +338,19 @@ function run_pure_clust(data::ClustData;
338338
end
339339
end
340340
end
341-
return ClustResultSimple(ClustData(data.region, data.years, data.K, data.T, mod_data, data.weights, data.delta_t, data.k_ids), clust_result.clust_config)
341+
return ClustResult(ClustData(data.region, data.years, data.K, data.T, mod_data, data.weights, data.k_ids;delta_t=data.delta_t),clust_result.cost, clust_result.config)
342342
end
343+
344+
"""
345+
data_type(clust_data::ClustData)
346+
Get data_type from a struct ClustData
347+
"""
348+
function data_type(data::ClustData)
349+
n_datasets = length(keys(data.data))
350+
data_type=String[]
351+
for (k,v) in data.data
352+
push!(data_type,k)
353+
end
354+
return data_type
355+
end
356+

test/capacityexpansion.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ using Clp
1010
# run clustering
1111
ts_clust_res = run_clust(ts_input_data;method="kmeans",representation="centroid",n_init=1,n_clust=365) # default k-means
1212
# run optimization
13-
model = run_opt(ts_clust_res.best_results,cep_input_data,Clp.Optimizer)
13+
model = run_opt(ts_clust_res.clust_data,cep_input_data,Clp.Optimizer)
1414
# compare to exact result
1515
exact_res=[70540.26439790576;0.0;8498.278397905757;0.0;80132.88454450261]
1616
@test exact_res model.variables["CAP"].data[:,1,1] atol=1
@@ -26,13 +26,13 @@ using Clp
2626
ts_full_data = run_clust(ts_input_data;method="hierarchical",representation="centroid",n_init=1,n_clust=30)
2727
## OPTIMIZATION ##
2828
optimizer=Clp.Optimizer
29-
scenarios["$state-$years-co2"] = run_opt(ts_clust_data.best_results,cep_data,optimizer;descriptor="co2",co2_limit=1000)
30-
scenarios["$state-$years-slack"] = run_opt(ts_clust_data.best_results,cep_data,optimizer;descriptor="slack",lost_el_load_cost=1e6, lost_CO2_emission_cost=700)
31-
scenarios["$state-$years-ex"] = run_opt(ts_clust_data.best_results,cep_data,optimizer;descriptor="ex",existing_infrastructure=true)
32-
scenarios["$state-$years-simple"] = run_opt(ts_clust_data.best_results,cep_data,optimizer;descriptor="simple storage",storage="simple")
33-
scenarios["$state-$years-seasonal"] = run_opt(ts_clust_data.best_results,cep_data,optimizer;descriptor="seasonal storage",storage="seasonal")
34-
design_result=run_opt(ts_clust_data.best_results,cep_data,optimizer;descriptor="des&op")
35-
scenarios["$state-$years-des&op"] = run_opt(ts_full_data.best_results,cep_data,design_result.opt_config,get_cep_design_variables(design_result),optimizer;lost_el_load_cost=1e6,lost_CO2_emission_cost=700)
29+
scenarios["$state-$years-co2"] = run_opt(ts_clust_data.clust_data,cep_data,optimizer;descriptor="co2",co2_limit=1000)
30+
scenarios["$state-$years-slack"] = run_opt(ts_clust_data.clust_data,cep_data,optimizer;descriptor="slack",lost_el_load_cost=1e6, lost_CO2_emission_cost=700)
31+
scenarios["$state-$years-ex"] = run_opt(ts_clust_data.clust_data,cep_data,optimizer;descriptor="ex",existing_infrastructure=true)
32+
scenarios["$state-$years-simple"] = run_opt(ts_clust_data.clust_data,cep_data,optimizer;descriptor="simple storage",storage="simple")
33+
scenarios["$state-$years-seasonal"] = run_opt(ts_clust_data.clust_data,cep_data,optimizer;descriptor="seasonal storage",storage="seasonal")
34+
design_result=run_opt(ts_clust_data.clust_data,cep_data,optimizer;descriptor="des&op")
35+
scenarios["$state-$years-des&op"] = run_opt(ts_full_data.clust_data,cep_data,design_result.opt_config,get_cep_design_variables(design_result),optimizer;lost_el_load_cost=1e6,lost_CO2_emission_cost=700)
3636
end
3737
end
3838
#Test transmission for a multi-node scenario
@@ -44,7 +44,7 @@ using Clp
4444
ts_clust_data = run_clust(ts_input_data;method="hierarchical",representation="centroid",n_init=1,n_clust=3)
4545
## OPTIMIZATION ##
4646
optimizer=Clp.Optimizer
47-
scenarios["$state-$years-trans"] = run_opt(ts_clust_data.best_results,cep_data,optimizer;descriptor="trans",transmission=true)
47+
scenarios["$state-$years-trans"] = run_opt(ts_clust_data.clust_data,cep_data,optimizer;descriptor="trans",transmission=true)
4848
end
4949
end
5050
#Test exact values for each of the previously calculated scenarios by comparison with exact scenarios

0 commit comments

Comments
 (0)