22import os
33from typing import Optional
44
5+ import matplotlib .dates as mdates
6+ import matplotlib .ticker as ticker
57import pandas as pd
8+ import plotly .graph_objs as go
69from git_commits_graph .config import FIGSIZE
10+ from git_commits_graph .config import MAX_NUM_BARS
711from git_commits_graph .config import XTICKS_FMT
812from matplotlib import pyplot as plt
913
1014
11- def plot_changes (commits , git_dir : str , log_scale : bool , aggregate_by : Optional [str ]):
15+ # Define a function to format the y-axis labels
16+ def y_fmt (x , pos ):
17+ if x >= 1000000 :
18+ return f"{ x * 1e-6 :.0f} M"
19+ elif x >= 1000 :
20+ return f"{ x * 1e-3 :.0f} k"
21+ else :
22+ return f"{ x :.0f} "
23+
24+
25+ # Create a FuncFormatter object from the y_fmt function
26+ y_formatter = ticker .FuncFormatter (y_fmt )
27+
28+
29+ def data_aggregation (aggregate_by , plot_data_add , plot_data_rem ):
30+ if aggregate_by :
31+ plot_data_add = run_aggregation (plot_data_add , col = "added" , period = aggregate_by )
32+ plot_data_rem = run_aggregation (
33+ plot_data_rem , col = "removed" , period = aggregate_by
34+ )
35+ else :
36+ for agg in ["D" , "W" , "M" , "Y" ]:
37+ if len (plot_data_add ) > MAX_NUM_BARS :
38+ plot_data_add = run_aggregation (plot_data_add , col = "added" , period = agg )
39+ plot_data_rem = run_aggregation (
40+ plot_data_rem , col = "removed" , period = agg
41+ )
42+ if len (plot_data_add ) <= MAX_NUM_BARS :
43+ break
44+ return plot_data_add , plot_data_rem
45+
46+
47+ def plot_changes_px (
48+ commits ,
49+ git_dir : str ,
50+ log_scale : bool ,
51+ aggregate_by : Optional [str ],
52+ output_file = "out.html" ,
53+ ):
54+ """Plot added/removed lines timeline."""
1255 plot_data_add = commits .added
1356 plot_data_rem = commits .removed
1457
@@ -18,27 +61,89 @@ def plot_changes(commits, git_dir: str, log_scale: bool, aggregate_by: Optional[
1861 plot_data_rem = (plot_data_rem + 1 ).apply (math .log10 )
1962 ylabel = "log number of lines added/removed"
2063
21- if aggregate_by :
22- plot_data_add = run_aggregation (plot_data_add , col = "added" , period = aggregate_by )
23- plot_data_rem = run_aggregation (
24- plot_data_rem , col = "removed" , period = aggregate_by
64+ plot_data_add , plot_data_rem = data_aggregation (
65+ aggregate_by = aggregate_by ,
66+ plot_data_add = plot_data_add ,
67+ plot_data_rem = plot_data_rem ,
68+ )
69+
70+ plot_data_add = pd .DataFrame (plot_data_add )
71+ plot_data_rem = pd .DataFrame (plot_data_rem )
72+ fig = go .Figure ()
73+
74+ fig .add_trace (
75+ go .Bar (
76+ x = plot_data_add .index ,
77+ y = plot_data_add .added ,
78+ name = "added" ,
79+ marker_color = "green" ,
80+ )
81+ )
82+ fig .add_trace (
83+ go .Bar (
84+ x = plot_data_rem .index ,
85+ y = - plot_data_rem .removed ,
86+ name = "removed" ,
87+ marker_color = "red" ,
2588 )
89+ )
90+
91+ fig .update_layout (
92+ title = f"Added/Removed Lines in repo { os .path .basename (git_dir )} " ,
93+ xaxis_title = "Date" ,
94+ yaxis_title = ylabel ,
95+ )
96+ if log_scale :
97+ fig .update_yaxes (type = "log" )
98+ # fig.show()
99+ fig .write_html (output_file )
100+ print (f"Saved to { output_file } " )
101+
102+
103+ def plot_changes (
104+ commits , git_dir : str , log_scale : bool , aggregate_by : Optional [str ], output_file
105+ ):
106+ """Plot added/removed lines timeline."""
107+ plot_data_add = commits .added
108+ plot_data_rem = commits .removed
109+
110+ ylabel = "number of lines added/removed"
111+ if log_scale :
112+ plot_data_add = (plot_data_add + 1 ).apply (math .log10 )
113+ plot_data_rem = (plot_data_rem + 1 ).apply (math .log10 )
114+ ylabel = "log number of lines added/removed"
115+
116+ plot_data_add , plot_data_rem = data_aggregation (
117+ aggregate_by = aggregate_by ,
118+ plot_data_add = plot_data_add ,
119+ plot_data_rem = plot_data_rem ,
120+ )
26121
27122 plot_data_add = pd .DataFrame (plot_data_add )
123+ plot_data_rem = pd .DataFrame (- plot_data_rem )
28124 fig , ax = plt .subplots (1 , 1 , figsize = FIGSIZE )
29125
126+ locator = mdates .AutoDateLocator (minticks = 3 , maxticks = 15 )
127+ x_formatter = mdates .ConciseDateFormatter (locator )
128+
30129 ax = plot_data_add .plot (kind = "bar" , ax = ax , color = "green" , label = "added" )
31- ax = (- plot_data_rem ).plot (kind = "bar" , ax = ax , color = "red" , label = "removed" )
130+ ax = plot_data_rem .plot (kind = "bar" , ax = ax , color = "red" , label = "removed" )
131+ ax .xaxis .set_major_locator (locator )
132+ ax .xaxis .set_major_formatter (x_formatter )
133+ ax .yaxis .set_major_formatter (y_formatter )
32134
33- # plot xticks (dates )
135+ plt . xticks ( rotation = 45 )
34136 format_xticklabels (ax , plot_data_add )
35137 plt .ylabel (ylabel )
138+
36139 ax .set_title (f"Added/Removed Lines in repo { os .path .basename (git_dir )} " )
37140 fig .tight_layout ()
38141
39142
40- def plot_total_lines (commits , git_dir , log_scale , aggregate_by ):
41- fig , ax = plt .subplots (1 , 1 , figsize = [8 , 6 ])
143+ def plot_total_lines_px (
144+ commits , git_dir , log_scale , aggregate_by , output_file = "out.html"
145+ ):
146+ """Plot total lines timeline."""
42147
43148 _delta = commits .delta
44149 if aggregate_by :
@@ -55,24 +160,65 @@ def plot_total_lines(commits, git_dir, log_scale, aggregate_by):
55160 plot_data = (plot_data + 1 ).apply (math .log10 )
56161 ylabel = "log number of lines"
57162
58- ax = plot_data .plot ()
163+ fig = go .Figure ()
164+ fig .add_trace (go .Scatter (x = plot_data .index , y = plot_data .values , mode = "lines" ))
165+ fig .update_layout (
166+ title = f"Number of Lines Progress in repo { os .path .basename (git_dir )} " ,
167+ xaxis_title = "Date" ,
168+ yaxis_title = ylabel ,
169+ )
170+ fig .update_yaxes (range = [0 , 1.1 * plot_data .max ()])
171+ # fig.show()
172+ fig .write_html (output_file )
173+ print (f"Saved to { output_file } " )
174+
175+
176+ def plot_total_lines (commits , git_dir , log_scale , aggregate_by , output_file ):
177+ """Plot total lines timeline."""
178+ fig , ax = plt .subplots (1 , 1 , figsize = [8 , 6 ])
179+
180+ _delta = commits .delta
181+ _delta .index = pd .to_datetime (_delta .index , utc = True )
182+ if aggregate_by :
183+ _delta = _delta .reset_index ()
184+ _delta = _delta .groupby (pd .Grouper (key = "date" , axis = 0 , freq = aggregate_by ))[
185+ "delta"
186+ ].sum ()
187+
188+ plot_data = _delta .cumsum ()
189+
190+ ylabel = "number of lines"
191+ if log_scale :
192+ plot_data = (plot_data + 1 ).apply (math .log10 )
193+ ylabel = "log number of lines"
194+
195+ ax = plot_data .plot (ax = ax , kind = "line" )
196+ locator = mdates .AutoDateLocator (minticks = 10 , maxticks = 15 )
197+ formatter = mdates .ConciseDateFormatter (locator )
198+ ax .xaxis .set_major_locator (locator )
199+ # ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(ax.xaxis.get_major_locator()))
200+ ax .xaxis .set_major_formatter (mdates .AutoDateFormatter (formatter ))
201+ ax .yaxis .set_major_formatter (y_formatter )
59202 plt .ylabel (ylabel )
60203 ax .set_title (f"Number of Lines Progress in repo { os .path .basename (git_dir )} " )
61204 ax .set_ylim ([0 , 1.1 * plot_data .max ()])
62205 # format_xticklabels(ax, plot_data)
63206 ax .xaxis_date ()
64207 # Optional. Just rotates x-ticklabels in this case.
65208 fig .autofmt_xdate ()
209+ fig .tight_layout ()
66210
67211
68212def format_xticklabels (ax , plot_data_add , fmt = XTICKS_FMT ):
213+ """Format xticks labels."""
69214 plot_data_add = plot_data_add .reset_index ()
70215 plot_data_add .date = pd .to_datetime (plot_data_add .date , utc = True )
71216 plot_data_add ["xticks" ] = plot_data_add .date .dt .strftime (fmt )
72217 ax .set_xticklabels (plot_data_add .xticks , rotation = 90 )
73218
74219
75220def run_aggregation (plot_data_add , col , period ):
221+ """Aggregate data by period."""
76222 plot_data_add = plot_data_add .reset_index ()
77223 plot_data_add .date = pd .to_datetime (plot_data_add .date , utc = True )
78224 plot_data_add = plot_data_add .groupby (pd .Grouper (key = "date" , axis = 0 , freq = period ))[
0 commit comments