changeset 2:19156ccdc3e1

Handle multiple commits at the same timestamp. Yes, that happens.
author Ludovic Chabant <ludovic@chabant.com>
date Tue, 05 Apr 2016 22:10:29 -0700
parents e9f44d2deb94
children 7d99080f276f
files hggit_sync.py
diffstat 1 files changed, 79 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/hggit_sync.py	Fri Feb 19 16:01:24 2016 -0800
+++ b/hggit_sync.py	Tue Apr 05 22:10:29 2016 -0700
@@ -37,29 +37,77 @@
 def build_commit_map(commits1, commits2):
     commits1 = sorted(commits1, key=lambda c: c.timestamp)
     commits2 = sorted(commits2, key=lambda c: c.timestamp)
-    commit_map = dict(map(lambda c: (c.timestamp, (c, None)), commits1))
+
+    # Build the commit map with the "left" commits' info.
+    commit_map = {}
+    for c1 in commits1:
+        if c1.timestamp not in commit_map:
+            commit_map[c1.timestamp] = [(c1, None)]
+        else:
+            # Each entry in the map is a list in case we have commits at the
+            # same timestamp. It's rare, but it happens, especially with
+            # people who have some fucked-up Git-fu.
+            print("Conflicting timestamps between %s and %s" %
+                    (c1.nodeid, commit_map[c1.timestamp][0][0].nodeid))
+            commit_map[c1.timestamp].append((c1, None))
+
+    # Now put the "easy" matches from the "right" commits.
     orphan_commits2 = []
     print("Building commit map...")
     for c in commits2:
-        entry = commit_map.get(c.timestamp, (None, None))
-        entry = (entry[0], c)
-        commit_map[c.timestamp] = entry
-        if entry[0] is None:
+        entry = commit_map.get(c.timestamp)
+        if entry is None:
+            # No "left" commit had this timestamp... we'll have an orphan.
+            entry = [(None, None)]
+            commit_map[c.timestamp] = entry
+
+        # Add the commit info.
+        idx = 0
+        if len(entry) > 1:
+            for i, e in enumerate(entry):
+                if e[0] and e[0].description.strip() == c.description.strip():
+                    idx = i
+                    break
+        if entry[idx][1] is None:
+            entry[idx] = (entry[idx][0], c)
+        else:
+            print("Attempting to match 2 commits (%s and %s) to the same base "
+                    "commit %s... creating orphan instead." %
+                    (entry[idx][1].nodeid, c.nodeid, entry[idx][0].nodeid))
+            entry.append((None, c))
+            idx = len(entry) - 1
+        if entry[idx][0] is None:
             orphan_commits2.append(c)
 
-    if orphan_commits2:
+    orphan_commits1 = []
+    for entry in commit_map.values():
+        for e in entry:
+            if e[1] is None:
+                orphan_commits1.append(e[0])
+
+    if orphan_commits1 or orphan_commits2:
         print("Fixing orphaned commits...")
         did_fix = True
-        orphan_commits1 = [e[0] for e in commit_map.values() if e[1] is None]
         while did_fix:
             did_fix = False
             for c2 in orphan_commits2:
                 for c1 in orphan_commits1:
                     if c1.description.strip() == c2.description.strip():
                         print("Mapping '%s' to '%s'" % (c1.nodeid, c2.nodeid))
-                        print("  Similar description: %s" % c1.description)
+                        print("  Same description: %s" % c1.description)
                         print("  Timestamp difference: %d" % (c2.timestamp - c1.timestamp))
-                        commit_map[c1.timestamp] = (c1, c2)
+                        entry = commit_map[c1.timestamp]
+                        for i, e in enumerate(entry):
+                            if e[0] and e[0].nodeid == c1.nodeid:
+                                entry[i] = (c1, c2)
+                                break
+                        entry = commit_map[c2.timestamp]
+                        for i, e in enumerate(entry):
+                            if e[1] and e[1].nodeid == c2.nodeid:
+                                assert e[0] is None
+                                del entry[i]
+                        if len(entry) == 0:
+                            del commit_map[c2.timestamp]
                         orphan_commits1.remove(c1)
                         orphan_commits2.remove(c2)
                         did_fix = True
@@ -67,6 +115,10 @@
                 if did_fix:
                     break
 
+        if orphan_commits1 or orphan_commits2:
+            print("Still have %d and %d orphaned commits." %
+                    (len(orphan_commits1), len(orphan_commits2)))
+
     return commit_map
 
 
@@ -94,7 +146,8 @@
     git_repo = os.path.join(hg_repo, '.hg', 'git')
     if res.rebuild:
         print("Removing existing Git repo...")
-        shutil.rmtree(git_repo)
+        if os.path.isdir(git_repo):
+            shutil.rmtree(git_repo)
         print("Syncing it again into: %s" % git_repo)
         git_output = subprocess.check_output([
             'git', 'clone', '--bare', res.rebuild, git_repo])
@@ -116,24 +169,26 @@
     os.chdir(hg_repo)
 
     commit_map = build_commit_map(git_commits, hg_commits)
-    for key, val in commit_map.iteritems():
-        if val[0] is None:
-            print("Mercurial commit '%s' (%s) has no Git mirror yet: %s" %
-                  (val[1].nodeid, val[1].timestamp, val[1].description))
-        if val[1] is None:
-            print("Git commit '%s' (%s) is new: %s" %
-                  (val[0].nodeid, val[0].timestamp, val[0].description))
+    for key, vals in commit_map.iteritems():
+        for val in vals:
+            if val[0] is None:
+                print("Mercurial commit '%s' (%s) has no Git mirror yet: %s" %
+                      (val[1].nodeid, val[1].timestamp, val[1].description))
+            if val[1] is None:
+                print("Git commit '%s' (%s) is new: %s" %
+                      (val[0].nodeid, val[0].timestamp, val[0].description))
 
     map_file = res.mapfile or os.path.join(hg_repo, '.hg', 'git-mapfile')
     print("Saving map file: %s" % map_file)
     with codecs.open(map_file, 'w', encoding='utf8') as fp:
-        for key, val in commit_map.iteritems():
-            if val[0] is None or val[1] is None:
-                continue
-            fp.write(val[0].nodeid)
-            fp.write(' ')
-            fp.write(val[1].nodeid)
-            fp.write('\n')
+        for key, vals in commit_map.iteritems():
+            for val in vals:
+                if val[0] is None or val[1] is None:
+                    continue
+                fp.write(val[0].nodeid)
+                fp.write(' ')
+                fp.write(val[1].nodeid)
+                fp.write('\n')
 
 
 if __name__ == '__main__':