diff --git a/components/scmprovider.py b/components/scmprovider.py index acd9134e..cb27ab8d 100644 --- a/components/scmprovider.py +++ b/components/scmprovider.py @@ -30,6 +30,16 @@ def repo_and_commit_to_url(repo, commit): return repo.replace(".git", "") + "/commit/" + commit +def repo_and_compare_url(repo, old_commit, new_commit): + if not repo or not any(h in repo for h in ["github.com", "gitlab.com"]): + return None + return repo.replace(".git", "") + "/compare/" + old_commit + "..." + new_commit + + +# Separator line placed between commit blocks in generated bug comments. +COMMENT_SEPARATOR = "----------------------------------------\n" + + class Commit: def __init__(self, pretty_line): parts = pretty_line.split("|") @@ -42,6 +52,10 @@ def __init__(self, pretty_line): self.files_deleted = [] self.files_other = [] + # The parent (first-parent) revision, resolved to a concrete hash in + # populate_details. Used to build a compare URL whose range includes + # this commit. + self.parent_revision = None self.populated = False def populate_details(self, repo, run): @@ -49,6 +63,7 @@ def populate_details(self, repo, run): return rev_range = [self.revision + "^", self.revision] + self.parent_revision = run(["git", "rev-parse", self.revision + "^"]).stdout.decode().strip() files_changed = run(["git", "diff", "--name-status"] + rev_range).stdout.decode().split("\n") for f in files_changed: @@ -239,65 +254,77 @@ def _print_differing_commit_lists(self, list_a, list_a_name, list_b, list_b_name self.logger.log(" - %s" % c, level=LogLevel.Error) raise Exception(problem) - def build_bug_description(self, list_of_commits, max_length): - # The commits are ordered oldest to newest. - # But when we file a bug we want the newest commit to be at the top. - list_of_commits = copy.deepcopy(list_of_commits) + # ================================================================= + + def _commit_block(self, c, verbosity): + # Render a single commit at the given verbosity: + # 1: revision, author and the revision link + # 2: + authored/committed dates and the commit summary + # 3: + the full description and the lists of changed files + if not c.populated: + raise Exception("Tried to build bug description; but commit details not populated.") + + s = "%s by %s\n" % (c.revision, c.author) + s += c.revision_link + "\n" + + if verbosity >= 2: + s += "Authored: %s\n" % (c.author_date) + s += "Committed: %s\n" % (c.commit_date) + s += "\n" + s += c.summary + "\n" + + if verbosity >= 3: + s += "\n" + s += c.description + "\n" + + if c.files_added: + s += "\n" + s += "Files Added:\n" + for f in c.files_added: + s += " - %s\n" % f + + if c.files_deleted: + s += "\n" + s += "Files Deleted:\n" + for f in c.files_deleted: + s += " - %s\n" % f + + if c.files_modified: + s += "\n" + s += "Files Modified:\n" + for f in c.files_modified: + s += " - %s\n" % f + + if c.files_other: + s += "\n" + s += "Files Changed:\n" + for f in c.files_other: + s += " - %s\n" % f + return s + + # ----------------------------------------------------------------- + + def _build_single_comment_description(self, list_of_commits, max_length): + # Render all the commits into a single comment. + # Reducing the verbosity until the result fits within max_length. + + # The caller has already copied the list; reorder it newest-first for display. list_of_commits.reverse() def _get_details(verbosity): if verbosity == 0: - s = "----------------------------------------\n" + s = COMMENT_SEPARATOR s += "%s commits elided, as they are too long for a bugzilla comment.\n\n" % len(list_of_commits) - s += "----------------------------------------\n" + s += COMMENT_SEPARATOR return s - s = "----------------------------------------\n" + s = COMMENT_SEPARATOR for c in list_of_commits: - if not c.populated: - raise Exception("Tried to build bug description; but commit details not populated.") - - s += "%s by %s\n" % (c.revision, c.author) - s += c.revision_link + "\n" - - if verbosity >= 2: - s += "Authored: %s\n" % (c.author_date) - s += "Committed: %s\n" % (c.commit_date) - s += "\n" - s += c.summary + "\n" - - if verbosity >= 3: - s += "\n" - s += c.description + "\n" - - if c.files_added: - s += "\n" - s += "Files Added:\n" - for f in c.files_added: - s += " - %s\n" % f - - if c.files_deleted: - s += "\n" - s += "Files Deleted:\n" - for f in c.files_deleted: - s += " - %s\n" % f - - if c.files_modified: - s += "\n" - s += "Files Modified:\n" - for f in c.files_modified: - s += " - %s\n" % f - - if c.files_other: - s += "\n" - s += "Files Changed:\n" - for f in c.files_other: - s += " - %s\n" % f - s += "\n----------------------------------------\n" + s += self._commit_block(c, verbosity) + s += "\n" + COMMENT_SEPARATOR return s - # Bugzilla's limit is 65535 details = _get_details(verbosity=3) if len(details) > max_length: details = _get_details(verbosity=2) @@ -305,4 +332,80 @@ def _get_details(verbosity): details = _get_details(verbosity=1) if len(details) > max_length: details = _get_details(verbosity=0) - return details + return [details] + + # ----------------------------------------------------------------- + + def _commit_entry(self, c, max_length): + # A commit block plus its trailing separator, rendered at the highest + # verbosity (3 -> 1) that fits within max_length. + # In theory, it falls back to the least verbose form if even that is + # too long to fit, but this will never happen in practice (knock on wood) + for verbosity in (3, 2, 1): + entry = self._commit_block(c, verbosity) + "\n" + COMMENT_SEPARATOR + if len(entry) <= max_length: + return entry + return entry + + def _chunk_comments(self, entries, max_length, first_header=""): + # Greedily group the already-rendered commit entries into comment-sized + # strings, each kept under max_length. first_header goes on the first + # comment only. entries is assumed non-empty; the first comment is + # seeded with the first entry, then each subsequent entry starts a new + # comment whenever it would overflow the current one. + comments = [] + current = first_header + COMMENT_SEPARATOR + entries[0] + for entry in entries[1:]: + if len(current) + len(entry) > max_length: + comments.append(current) + current = COMMENT_SEPARATOR + entry + else: + current += entry + comments.append(current) + return comments + + def _build_chained_comment_description(self, list_of_commits, max_length, repo_url): + # Always emit full commit details, split across as many comments as needed. + if not list_of_commits: + return [""] + + # The compare URL must start at the parent of the oldest commit so that + # the oldest commit itself is included in the range (a `A...B` compare + # excludes A). + base_revision = list_of_commits[0].parent_revision + newest_revision = list_of_commits[-1].revision + # The caller has already copied the list; reorder it newest-first. + list_of_commits.reverse() + + # Format each commit at full detail, but drop to a lower verbosity for + # any single commit whose block would not otherwise fit in a comment. + # A comment is a leading separator followed by entries, so budget for it. + entry_budget = max_length - len(COMMENT_SEPARATOR) + entries = [self._commit_entry(c, entry_budget) for c in list_of_commits] + + # If the whole thing fits in one comment, post it as-is, with no header. + whole_length = len(COMMENT_SEPARATOR) + sum(len(e) for e in entries) + if whole_length <= max_length: + return [COMMENT_SEPARATOR + "".join(entries)] + + # It needs multiple comments: head the first one with a link to the + # whole commit range so it can be reviewed at a glance. Passing the + # header to _chunk_comments lets its length count against the budget. + compare_url = repo_and_compare_url(repo_url, base_revision, newest_revision) if (repo_url and base_revision) else None + + if compare_url: + first_header = "All %s commits: %s\n(continued in following comments)\n\n" % (len(list_of_commits), compare_url) + else: + first_header = "" + return self._chunk_comments(entries, max_length, first_header) + + # ----------------------------------------------------------------- + + def build_bug_description(self, list_of_commits, max_length, repo_url=None, options=None): + # The commits are ordered oldest to newest. Copy once here since both + # builders reorder the list in place, then dispatch on the task options. + options = options or {} + list_of_commits = copy.deepcopy(list_of_commits) + if 'verbose-diff' in options: + return self._build_chained_comment_description(list_of_commits, max_length, repo_url) + return self._build_single_comment_description(list_of_commits, max_length) diff --git a/tasktypes/commitalert.py b/tasktypes/commitalert.py index 592de037..b08a0fce 100644 --- a/tasktypes/commitalert.py +++ b/tasktypes/commitalert.py @@ -105,7 +105,10 @@ def _process_new_commits(self, library, task, new_commits, all_library_jobs): depends_on = all_library_jobs[0].bugzilla_id if all_library_jobs else None open_dependencies = self.bugzillaProvider.find_open_bugs_info([j.bugzilla_id for j in all_library_jobs]) - description = CommentTemplates.EXAMINE_COMMITS_BODY(library, task, self.scmProvider.build_bug_description(filtered_commits, 65534 - 500), open_dependencies) + commit_chunks = self.scmProvider.build_bug_description(filtered_commits, 65534 - 500, library.repo_url, options=task.options) + description = CommentTemplates.EXAMINE_COMMITS_BODY(library, task, commit_chunks[0], open_dependencies) bugzilla_id = self.bugzillaProvider.file_bug(library, CommentTemplates.EXAMINE_COMMITS_SUMMARY(library, new_commits), description, task.cc, needinfo=task.needinfo, depends_on=depends_on, blocks=task.blocking, moco_confidential=True) + for chunk in commit_chunks[1:]: + self.bugzillaProvider.comment_on_bug(bugzilla_id, chunk) self.dbProvider.create_job(JOBTYPE.COMMITALERT, library, newest_commit.revision, JOBSTATUS.DONE, JOBOUTCOME.ALL_SUCCESS, bugzilla_id) diff --git a/tasktypes/vendoring.py b/tasktypes/vendoring.py index 132845d9..0f7dfe6d 100644 --- a/tasktypes/vendoring.py +++ b/tasktypes/vendoring.py @@ -138,10 +138,15 @@ def _process_new_job(self, library, task, new_version, timestamp, most_recent_jo # File the bug ------------------------ all_upstream_commits, unseen_upstream_commits = self.scmProvider.check_for_update(library, task, new_version, most_recent_job.version if most_recent_job else None) commit_stats = self.mercurialProvider.diff_stats() - commit_details = self.scmProvider.build_bug_description(all_upstream_commits, 65534 - len(commit_stats) - 220) if library.should_show_commit_details else "" + if library.should_show_commit_details: + commit_chunks = self.scmProvider.build_bug_description(all_upstream_commits, 65534 - len(commit_stats) - 220, library.repo_url, options=task.options) + else: + commit_chunks = [""] - created_job.bugzilla_id = self.bugzillaProvider.file_bug(library, CommentTemplates.UPDATE_SUMMARY(library, new_version, timestamp), CommentTemplates.UPDATE_DETAILS(len(all_upstream_commits), len(unseen_upstream_commits), commit_stats, commit_details), task.cc, blocks=task.blocking) + created_job.bugzilla_id = self.bugzillaProvider.file_bug(library, CommentTemplates.UPDATE_SUMMARY(library, new_version, timestamp), CommentTemplates.UPDATE_DETAILS(len(all_upstream_commits), len(unseen_upstream_commits), commit_stats, commit_chunks[0]), task.cc, blocks=task.blocking) self.dbProvider.update_job_add_bug_id(created_job, created_job.bugzilla_id) + for chunk in commit_chunks[1:]: + self.bugzillaProvider.comment_on_bug(created_job.bugzilla_id, chunk) # Address any prior bug --------------- if most_recent_job and not most_recent_job.relinquished: diff --git a/test.py b/test.py index ed608d16..569e1023 100755 --- a/test.py +++ b/test.py @@ -19,6 +19,7 @@ "python_language", "treeherder_api", "library", + "build_bug_description", "lambda_capture", "class_passing", "frequency" diff --git a/tests/build_bug_description.py b/tests/build_bug_description.py new file mode 100644 index 00000000..530bc66f --- /dev/null +++ b/tests/build_bug_description.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import sys +import unittest + +sys.path.append(".") +sys.path.append("..") + +from components.scmprovider import SCMProvider, Commit + +GITHUB_REPO = "https://github.com/org/repo" + + +def make_commit(revision, parent_revision, summary="Do a thing", + description="A longer description of the thing.", + files_modified=None): + # Build a fully-populated Commit without touching a real repository. + c = Commit("%s|2020-01-01 00:00:00 +0000|2020-01-02 00:00:00 +0000" % revision) + c.summary = summary + c.author = "Some Developer " + c.description = description + c.revision_link = "%s/commit/%s" % (GITHUB_REPO, revision) + c.parent_revision = parent_revision + c.files_modified = files_modified if files_modified is not None else ["src/thing.c"] + c.populated = True + return c + + +class TestBuildBugDescription(unittest.TestCase): + @classmethod + def setUpClass(cls): + # build_bug_description / _commit_block don't use the command or logging + # providers, so a bare provider is enough. + cls.scm = SCMProvider({}) + + def _commits(self): + # Ordered oldest -> newest, as build_bug_description expects. + return [ + make_commit("aaaaaaa", "p0000000", summary="First commit"), + make_commit("bbbbbbb", "aaaaaaa", summary="Second commit"), + make_commit("ccccccc", "bbbbbbb", summary="Third commit"), + ] + + # -- _commit_block --------------------------------------------------------- + + def test_commit_block_verbosity_levels(self): + c = make_commit("abcdef0", "0fedcba", summary="Fix the widget", + description="Body of the change.", files_modified=["src/w.c"]) + + v1 = self.scm._commit_block(c, 1) + self.assertIn("abcdef0 by Some Developer", v1) + self.assertIn(c.revision_link, v1) + self.assertNotIn("Authored:", v1) + self.assertNotIn("Fix the widget", v1) + self.assertNotIn("Files Modified", v1) + + v2 = self.scm._commit_block(c, 2) + self.assertIn("Authored:", v2) + self.assertIn("Committed:", v2) + self.assertIn("Fix the widget", v2) + self.assertNotIn("Body of the change.", v2) + self.assertNotIn("Files Modified", v2) + + v3 = self.scm._commit_block(c, 3) + self.assertIn("Fix the widget", v3) + self.assertIn("Body of the change.", v3) + self.assertIn("Files Modified", v3) + self.assertIn("src/w.c", v3) + + def test_commit_block_requires_populated(self): + c = Commit("deadbee|d|d") # not populated + with self.assertRaises(Exception): + self.scm._commit_block(c, 1) + + # -- dispatch -------------------------------------------------------------- + + def test_dispatch_selects_builder(self): + commits = self._commits() + + # Tiny budget makes the two behaviors clearly distinguishable: the + # single-comment builder elides, the chained builder splits instead. + single = self.scm.build_bug_description(commits, 60, GITHUB_REPO, options={}) + self.assertEqual(len(single), 1) + self.assertIn("commits elided", single[0]) + + chained = self.scm.build_bug_description(commits, 60, GITHUB_REPO, options={"verbose-diff": True}) + self.assertGreater(len(chained), 1) + self.assertNotIn("commits elided", "".join(chained)) + + def test_options_default_is_single_comment(self): + commits = self._commits() + # No options passed at all -> single-comment behavior. + result = self.scm.build_bug_description(commits, 60, GITHUB_REPO) + self.assertEqual(len(result), 1) + self.assertIn("commits elided", result[0]) + + # -- single-comment behavior ---------------------------------------------- + + def test_single_comment_full_when_it_fits(self): + commits = self._commits() + result = self.scm.build_bug_description(commits, 65534, GITHUB_REPO, options={}) + self.assertEqual(len(result), 1) + self.assertNotIn("commits elided", result[0]) + # Full detail present, newest commit first. + self.assertIn("Third commit", result[0]) + self.assertIn("Files Modified", result[0]) + self.assertLess(result[0].index("Third commit"), result[0].index("First commit")) + + # -- chained behavior ------------------------------------------------------ + + def test_chained_single_chunk_when_it_fits(self): + commits = self._commits() + result = self.scm.build_bug_description(commits, 65534, GITHUB_REPO, options={"verbose-diff": True}) + self.assertEqual(len(result), 1) + # No continuation header for a single chunk. + self.assertNotIn("continued in following comments", result[0]) + + def test_chained_splits_with_compare_url_including_oldest(self): + commits = self._commits() + result = self.scm.build_bug_description(commits, 200, GITHUB_REPO, options={"verbose-diff": True}) + self.assertGreater(len(result), 1) + # The compare range starts at the parent of the oldest commit, so the + # oldest commit itself is included. + self.assertIn("All 3 commits: %s/compare/p0000000...ccccccc" % GITHUB_REPO, result[0]) + self.assertIn("continued in following comments", result[0]) + + def test_chained_no_compare_url_for_unsupported_host(self): + commits = self._commits() + result = self.scm.build_bug_description(commits, 200, "https://example.com/repo", options={"verbose-diff": True}) + self.assertGreater(len(result), 1) + self.assertNotIn("/compare/", "".join(result)) + + def test_chained_reduces_verbosity_for_oversized_commit(self): + # A single commit whose full detail exceeds the limit is rendered at a + # lower verbosity so it fits, rather than blowing past the comment size. + big = make_commit("abc1234", "par0000", summary="Summary line", + description="D" * 500, files_modified=["src/one.c", "src/two.c"]) + result = self.scm.build_bug_description([big], 200, GITHUB_REPO, options={"verbose-diff": True}) + self.assertEqual(len(result), 1) + self.assertLessEqual(len(result[0]), 200) # it now fits within the limit + self.assertIn("abc1234 by", result[0]) # still identifies the commit + self.assertNotIn("D" * 500, result[0]) # full description dropped + self.assertNotIn("Files Modified", result[0]) # file list dropped + + def test_chained_empty_commit_list(self): + result = self.scm.build_bug_description([], 65534, GITHUB_REPO, options={"verbose-diff": True}) + self.assertEqual(result, [""]) + + +if __name__ == "__main__": + unittest.main(verbosity=2)