88import pandas as pd
99import numpy as np
1010import plotly .express as px
11+ import plotly .graph_objects as go
1112
13+ import networkx as nx
1214from networkx .readwrite .json_graph import adjacency_data
1315
1416from sklearn .datasets import fetch_openml , load_digits , load_iris
2931
3032OPENML_URL = 'https://www.openml.org/search?type=data&sort=runs&status=active'
3133
32- DATA_HELP = f'''
33- To begin select you data source:
34-
35- * Select an example to see how this works
36- * You can submit a csv to try on you data
37- * Or you can use a publicly available dataset from [OpenML]({ OPENML_URL } ).
38- '''
39-
4034DATA_INFO = 'Non-numeric and NaN features get dropped. NaN rows get replaced by mean'
4135
4236MAPPER_HELP = "Experiment with Mapper Settings and hit Run when you're ready!"
5145
5246ABOUT = f'{ GIT_REPO_URL } /blob/main/README.md'
5347
54- APP_DESC = f"""
55- This app leverages the *Mapper Algorithm* from Topological Data Analysis (TDA) to provide an efficient and intuitive way to gain insights from your datasets.
48+ APP_DESC = f'''
49+ This app leverages the *Mapper Algorithm* from Topological Data Analysis
50+ (TDA) to provide an efficient and intuitive way to gain insights from your
51+ datasets.
5652
57- For more details:
58- **{ GIT_REPO_URL } **.
59- """
53+ More details on **[GitHub]({ GIT_REPO_URL } )**.
54+ '''
6055
6156# V_* are reusable values for widgets
6257
8277
8378V_DATA_SUMMARY_BINS = 5
8479
85- # VD_ are reusable default values for widgets
80+ # VD_* are reusable default values for widgets
8681
8782VD_SEED = 42
8883
8984VD_3D = False
9085
91- # K_ are reusable keys for widgets
86+ # K_* are reusable keys for widgets
9287
9388K_UPLOADER = 'key_uploader'
9489
116111
117112K_DATA_SUMMARY = 'key_data_summary'
118113
119- # S_ are reusable manually managed stored objects
114+ # S_* are reusable manually managed stored objects
120115
121116S_RESULTS = 'stored_results'
122117
123- # T_ are for call triggers
124-
125- T_RENDER_MAPPER = True
126-
127- T_DRAW_MAPPER = True
128-
129118
130119class Results :
131120
@@ -173,8 +162,8 @@ def mapper_warning(nodes_num):
173162 return f'''
174163 ⚠️ This graph contains { nodes_num } nodes,
175164 which is more than the maximum allowed of { MAX_NODES } .
176- This may take time to display, make your browser run slow or either crash.
177- Are you sure you want to proceed?
165+ This may take time to display, make your browser run slow or either
166+ crash. Are you sure you want to proceed?
178167 '''
179168
180169
@@ -187,7 +176,8 @@ def data_caption(df_X, df_y):
187176 return 'No data source found'
188177 if df_y .empty :
189178 return f'{ len (df_X )} instances, { len (df_X .columns )} features'
190- return f'{ len (df_X )} instances, { len (df_X .columns )} + { len (df_y .columns )} features'
179+ return f'''{ len (df_X )} instances,
180+ { len (df_X .columns )} + { len (df_y .columns )} features'''
191181
192182
193183def fix_data (data ):
@@ -282,31 +272,42 @@ def add_download_graph():
282272
283273def add_graph_caption ():
284274 mapper_graph = st .session_state [S_RESULTS ].mapper_graph
285- if mapper_graph is None :
286- return
287- import networkx as nx
288- ccs = nx . connected_components ( mapper_graph )
289- size = nx . get_node_attributes ( mapper_graph , ATTR_SIZE )
290- ff = {}
291- for cc in ccs :
292- len_cc = len ( cc )
293- for u in cc :
294- ff [ u ] = 1.0 / len_cc
295- df_ccs = pd .DataFrame ({
296- 'kpi' : list ( ff . values ())
275+ nodes_num = 0
276+ edges_num = 0
277+ if mapper_graph is not None :
278+ nodes_num = mapper_graph . number_of_nodes ( )
279+ edges_num = mapper_graph . number_of_edges ( )
280+ st . caption ( f' { nodes_num } nodes, { edges_num } edges' )
281+
282+
283+ def add_graph_hist () :
284+ mapper_graph = st . session_state [ S_RESULTS ]. mapper_graph
285+ df_kpi = pd .DataFrame ({
286+ 'kpi' : []
297287 })
298- fig = px .histogram (df_ccs , x = 'kpi' , height = 250 , nbins = 10 )
288+ if mapper_graph is not None :
289+ ccs = nx .connected_components (mapper_graph )
290+ size = nx .get_node_attributes (mapper_graph , ATTR_SIZE )
291+ ff = {}
292+ for cc in ccs :
293+ len_cc = len (cc )
294+ for u in cc :
295+ ff [u ] = 1.0 / len_cc
296+ df_kpi = pd .DataFrame ({
297+ 'kpi' : list (ff .values ())
298+ })
299+ fig = px .histogram (df_kpi , x = 'kpi' , height = 250 , nbins = 10 )
299300 fig .update_layout (
300- margin = dict (l = 0 , r = 0 , t = 0 , b = 0 , pad = 5 ),
301+ margin = dict (l = 0 , r = 0 , t = 0 , b = 0 , pad = 0 ),
301302 xaxis_visible = True ,
302303 xaxis_title_standoff = 0 ,
303- xaxis_title = 'kpi = 1 / connected component size' ,
304- yaxis_title_standoff = 10 ,
305- yaxis_visible = True )
306- nodes_num = mapper_graph .number_of_nodes ()
307- edges_num = mapper_graph .number_of_edges ()
308- st .caption (f'{ nodes_num } nodes, { edges_num } edges' )
304+ xaxis_title = None ,
305+ yaxis_title_standoff = 0 ,
306+ yaxis_visible = True ,
307+ yaxis_title = None )
309308 st .plotly_chart (fig , use_container_width = True )
309+ _help = 'The plot shows the histogram of $kpi(u) = 1 / |cc(u)|$ where $u$ is a node of the Mapper graph and $cc(u)$ is the connected component of $u$.'
310+ st .caption ('kpi = 1 / connected component size' , help = _help )
310311
311312
312313def add_data_source_csv ():
@@ -434,8 +435,10 @@ def add_mapper_settings():
434435 if run :
435436 with st .spinner ('⏳ Computing Mapper...' ):
436437 compute_mapper ()
437- with col_1 :
438+ with col_0 :
438439 add_graph_caption ()
440+ with col_1 :
441+ add_graph_hist ()
439442 with col1_1 :
440443 add_download_graph ()
441444
@@ -520,7 +523,6 @@ def render_mapper_proceed():
520523 colors = X ,
521524 seed = seed )
522525 st .session_state [S_RESULTS ].set_mapper_plot (mapper_plot )
523- st .session_state [T_RENDER_MAPPER ] = False
524526 draw_mapper ()
525527
526528
@@ -534,7 +536,6 @@ def draw_mapper():
534536 mapper_fig = mapper_plot .plot ()
535537 mapper_fig .update_layout (uirevision = 'constant' )
536538 st .session_state ['mapper_fig' ] = mapper_fig
537- st .session_state [T_DRAW_MAPPER ] = False
538539
539540
540541def get_colors_data_summary ():
@@ -562,17 +563,22 @@ def get_colors_data_summary():
562563 return colors
563564
564565
565- def add_data_tools ():
566+ def add_plot_setting ():
567+ seed = st .number_input (
568+ 'Seed' ,
569+ value = VD_SEED ,
570+ key = K_SEED )
571+ st .toggle (
572+ 'Enable 3D' ,
573+ value = VD_3D ,
574+ key = K_ENABLE_3D )
566575 df_X = st .session_state [S_RESULTS ].df_X
567- if df_X is None :
568- return
569- df_y = st .session_state [S_RESULTS ].df_y
570- df_summary = st .session_state [S_RESULTS ].df_summary
571- if df_summary is None :
572- df_summary = pd .DataFrame ()
573-
574- def _trigger_draw_mapper ():
575- st .session_state [T_DRAW_MAPPER ] = True
576+ df_summary = pd .DataFrame ()
577+ if df_X is not None :
578+ df_y = st .session_state [S_RESULTS ].df_y
579+ df_summary = st .session_state [S_RESULTS ].df_summary
580+ if df_summary is None :
581+ df_summary = pd .DataFrame ()
576582 st .data_editor (
577583 df_summary ,
578584 height = 250 ,
@@ -582,36 +588,28 @@ def _trigger_draw_mapper():
582588 column_config = {
583589 V_DATA_SUMMARY_HIST : st .column_config .BarChartColumn (
584590 width = 'small' ),
591+ V_DATA_SUMMARY_FEAT : st .column_config .TextColumn (
592+ width = 'small' ,
593+ disabled = True ),
594+ V_DATA_SUMMARY_COLOR : st .column_config .CheckboxColumn (
595+ width = 'small' ,
596+ disabled = False )
585597 },
586- key = K_DATA_SUMMARY ,
587- on_change = _trigger_draw_mapper )
598+ key = K_DATA_SUMMARY )
599+ update = st .button (
600+ '🌊 Update Rendering' ,
601+ use_container_width = True )
602+ if update :
603+ render_mapper ()
588604
589605
590- def add_plot_setting ():
591- def _trigger_draw_mapper ():
592- st .session_state [T_DRAW_MAPPER ] = True
593- def _trigger_render_mapper ():
594- st .session_state [T_RENDER_MAPPER ] = True
595- seed = st .number_input (
596- 'Seed' ,
597- value = VD_SEED ,
598- key = K_SEED ,
599- on_change = _trigger_draw_mapper )
600- st .toggle (
601- 'Enable 3D' ,
602- value = VD_3D ,
603- key = K_ENABLE_3D ,
604- on_change = _trigger_render_mapper )
605- mapper_graph = st .session_state [S_RESULTS ].mapper_graph
606606
607607
608608def add_graph_plot ():
609- if 'mapper_fig' not in st .session_state :
610- return
611609 mapper_graph = st .session_state [S_RESULTS ].mapper_graph
612- if mapper_graph is None :
613- return
614- mapper_fig = st .session_state ['mapper_fig' ]
610+ mapper_fig = go . Figure ()
611+ if 'mapper_fig' in st . session_state :
612+ mapper_fig = st .session_state ['mapper_fig' ]
615613 st .plotly_chart (
616614 mapper_fig ,
617615 use_container_width = True ,
@@ -633,20 +631,15 @@ def add_data():
633631 with col_1 :
634632 st .markdown ('####' )
635633 st .dataframe (df_all , height = 250 , use_container_width = True )
636- st .caption (cap )
634+ st .caption (cap , help = DATA_INFO )
637635
638636
639637def add_rendering ():
640638 st .markdown ('### 🎨 Rendering' )
641639 pl_col_0 , pl_col_1 = st .columns ([2 , 4 ])
642640 with pl_col_0 :
643- add_data_tools ()
644641 add_plot_setting ()
645642 with pl_col_1 :
646- if st .session_state .get (T_RENDER_MAPPER , True ):
647- render_mapper ()
648- if st .session_state .get (T_DRAW_MAPPER , True ):
649- draw_mapper ()
650643 add_graph_plot ()
651644
652645
@@ -686,7 +679,7 @@ def main():
686679 add_rendering ()
687680 st .markdown (f'''
688681 ---
689- If you found this app useful please consider leaving a :star: on { GIT_REPO_URL }
682+ If you find this app useful, please consider leaving a :star: on **[GitHub]( { GIT_REPO_URL } )**.
690683 ''' )
691684
692685
0 commit comments