Skip to content

Commit

Permalink
Remove duplicate runs from joyrun (#757)
Browse files Browse the repository at this point in the history
* remove duplicate runs from joyrun and support user-defined threshold value in command line

* Reformat run_page/joyrun_sync.py using black to align with github merge rules
  • Loading branch information
simongong authored Dec 30, 2024
1 parent 73489da commit 33d7b31
Showing 1 changed file with 32 additions and 4 deletions.
36 changes: 32 additions & 4 deletions run_page/joyrun_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def login_by_phone(self):

def get_runs_records_ids(self):
payload = {
"year": 0,
"year": 0, # as of the "year". when set to 2023, it means fetch records during currentYear ~ 2023. set to 0 means fetch all.
}
r = self.session.post(
f"{self.base_url}/userRunList.aspx",
Expand Down Expand Up @@ -332,10 +332,29 @@ def get_all_joyrun_tracks(self, old_tracks_ids, with_gpx=False):
old_gpx_ids = [i.split(".")[0] for i in old_gpx_ids if not i.startswith(".")]
new_run_ids = list(set(run_ids) - set(old_tracks_ids))
tracks = []
seen_runs = {} # Dictionary to keep track of unique runs with start time as key
for i in new_run_ids:
run_data = self.get_single_run_record(i)
track = self.parse_raw_data_to_nametuple(run_data, old_gpx_ids, with_gpx)
tracks.append(track)
start_time = datetime.fromtimestamp(run_data["runrecord"]["starttime"])
distance = run_data["runrecord"]["meter"]

is_duplicate = False
for seen_start in list(seen_runs.keys()):
if abs((start_time - seen_start).total_seconds()) <= threshold:
if distance > seen_runs[seen_start]["distance"]:
seen_runs[seen_start] = {
"run_data": run_data,
"distance": distance,
}
is_duplicate = True
break
if not is_duplicate:
seen_runs[start_time] = {"run_data": run_data, "distance": distance}
for run in seen_runs.values():
track = self.parse_raw_data_to_nametuple(
run["run_data"], old_gpx_ids, with_gpx
)
tracks.append(track)
return tracks


Expand Down Expand Up @@ -440,6 +459,13 @@ def _generate_svg_profile(athlete, min_grid_distance):
action="store_true",
help="from uid and sid for download datas",
)
parser.add_argument(
"--threshold",
dest="threshold",
help="threshold in seconds to consider runs as duplicates",
type=int,
default=10,
)
options = parser.parse_args()
if options.from_uid_sid:
j = Joyrun.from_uid_sid(
Expand All @@ -455,7 +481,9 @@ def _generate_svg_profile(athlete, min_grid_distance):

generator = Generator(SQL_FILE)
old_tracks_ids = generator.get_old_tracks_ids()
tracks = j.get_all_joyrun_tracks(old_tracks_ids, options.with_gpx)
tracks = j.get_all_joyrun_tracks(
old_tracks_ids, options.with_gpx, options.threshold
)
generator.sync_from_app(tracks)
activities_list = generator.load()
with open(JSON_FILE, "w") as f:
Expand Down

0 comments on commit 33d7b31

Please sign in to comment.