Skip to content

Commit 6f1967d

Browse files
committed
fix(toggl): disambiguate entries that seem like duplicates to Harvest
since Harvest data doesn't contain the start/end times of time entries, entries on the same day, for the same person, with the same total duration and description/notes can appear as duplicates this fixes the notes for all such seemingly duplicate entries by appending a counter to allow Harvest to disambiguate
1 parent d836f1b commit 6f1967d

File tree

2 files changed

+41
-8
lines changed

2 files changed

+41
-8
lines changed

compiler_admin/services/toggl.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,20 @@ def convert_to_harvest(
128128
info = project_info()
129129
source["Project"] = source["Project"].apply(lambda x: info.get(key=x, default=x))
130130

131+
# find duplicates based on a subset of columns
132+
cols = ["Date", "Hours", "First name", "Last name", "Notes"]
133+
is_duplicate = source.duplicated(subset=cols, keep=False)
134+
135+
if is_duplicate.any():
136+
# Create a counter for the duplicate rows
137+
counter = source[is_duplicate].groupby(cols).cumcount() + 1
138+
group_size = source[is_duplicate].groupby(cols)["Notes"].transform("size")
139+
140+
# Update the 'Notes' column with the counter
141+
source.loc[is_duplicate, "Notes"] = (
142+
source.loc[is_duplicate, "Notes"] + " (" + counter.astype(str) + "/" + group_size.astype(str) + ")"
143+
)
144+
131145
files.write_csv(output_path, source, columns=output_cols)
132146

133147

tests/services/test_toggl.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,27 @@
1-
from datetime import datetime, timedelta, date
2-
from io import BytesIO, StringIO
31
import math
42
import sys
3+
from datetime import date, datetime, timedelta
4+
from io import BytesIO, StringIO
55
from pathlib import Path
66
from tempfile import NamedTemporaryFile
77

88
import pandas as pd
99
import pytest
1010

1111
import compiler_admin.services.toggl
12+
from compiler_admin.services.toggl import CONVERTERS, HARVEST_COLUMNS, JUSTWORKS_COLUMNS, TOGGL_COLUMNS
13+
from compiler_admin.services.toggl import __name__ as MODULE
1214
from compiler_admin.services.toggl import (
13-
CONVERTERS,
14-
__name__ as MODULE,
1515
_get_first_name,
1616
_get_last_name,
1717
_prepare_input,
1818
_str_timedelta,
1919
convert_to_harvest,
2020
convert_to_justworks,
2121
download_time_entries,
22+
files,
2223
lock_time_entries,
2324
summarize,
24-
TOGGL_COLUMNS,
25-
HARVEST_COLUMNS,
26-
JUSTWORKS_COLUMNS,
27-
files,
2825
)
2926

3027

@@ -209,6 +206,28 @@ def test_convert_to_harvest_sample(toggl_file, harvest_file, mock_google_user_in
209206
assert output_df["Client"].eq("Test Client 123").all()
210207

211208

209+
def test_convert_to_harvest_with_duplicates(mock_google_user_info):
210+
# Test that seemingly duplicate time entries are disambiguated
211+
212+
mock_google_user_info.return_value = {"First Name": "Test", "Last Name": "User"}
213+
csv_input = """Email,Project,Task,Client,Start date,Start time,Duration,Description
214+
test@example.com,Compiler,Backend,ACME,2025-11-18,09:00:00,01:00:00,A task
215+
test@example.com,Compiler,Backend,ACME,2025-11-18,10:00:00,01:00:00,A task
216+
test@example.com,Compiler,Backend,ACME,2025-11-18,11:00:00,02:00:00,Another task
217+
"""
218+
input_buffer = StringIO(csv_input)
219+
output_buffer = StringIO()
220+
221+
convert_to_harvest(source_path=input_buffer, output_path=output_buffer, client_name="ACME")
222+
223+
output_buffer.seek(0)
224+
df = pd.read_csv(output_buffer)
225+
226+
assert df.loc[0, "Notes"] == "[Backend] A task (1/2)"
227+
assert df.loc[1, "Notes"] == "[Backend] A task (2/2)"
228+
assert df.loc[2, "Notes"] == "[Backend] Another task"
229+
230+
212231
def test_convert_to_justworks_mocked(toggl_file, spy_files):
213232
convert_to_justworks(toggl_file)
214233

0 commit comments

Comments
 (0)