From 9117d5e7c8f866c81e4de4c4107425aabe8e5553 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Wed, 20 Dec 2023 00:21:06 -0500 Subject: [PATCH 01/20] Add Remediation Commit Support Signed-off-by: John Mertic --- .gitignore | 6 ++ contrib_check.py | 7 +- contrib_check/commit.py | 54 ++++++++++++-- contrib_check/org.py | 17 +++-- contrib_check/repo.py | 107 ++++++++++++++++++++++++-- requirements.txt | 3 +- tests.py | 161 +++++++++++++++++++++++++++++----------- 7 files changed, 291 insertions(+), 64 deletions(-) diff --git a/.gitignore b/.gitignore index 1d10392..40e8fbe 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +.coverage # PyInstaller # Usually these files are written by a python script from a template @@ -62,3 +63,8 @@ Icon Network Trash Folder Temporary Items .apdisk + +# Don't save output/config files +*.yml +*.csv +dco-signoffs diff --git a/contrib_check.py b/contrib_check.py index 170ab8c..18b66b2 100755 --- a/contrib_check.py +++ b/contrib_check.py @@ -36,11 +36,11 @@ def main(): if 'repo' in config: args.repo = config['repo'] elif 'org' in config: - args.org = config['org'] + args.org = config['org']['name'] repos = [] if args.org: - orgObj = Org(args.org) + orgObj = Org(args.org,load_repos=False) if 'type' in config['org']: orgObj.org_type=config['org']['type'] if 'ignore_repos' in config['org']: @@ -49,7 +49,7 @@ def main(): orgObj.only_repos=config['org']['only_repos'] if 'skip_archived' in config['org']: orgObj.skip_archived=config['org']['skip_archived'] - repos = orgObj.loadRepos() + repos = orgObj.reloadRepos() if args.repo: repos = [Repo(args.repo)] @@ -64,6 +64,7 @@ def main(): else: repoObj.loadPastSignoffs() repoObj.scan() + print(repoObj.remediations) print("This took "+str(datetime.now() - startTime)) diff --git a/contrib_check/commit.py b/contrib_check/commit.py index 3411d9e..98cd885 100644 --- a/contrib_check/commit.py +++ b/contrib_check/commit.py @@ -9,6 +9,8 @@ import re +# third party modules +import yaml import git class Commit(): @@ -20,14 +22,23 @@ class Commit(): create_prior_commits_dir = 'dco-signoffs' + remediation_regex_individual = "^I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)\s*$" + remediation_regex_thirdparty = "^On behalf of (.*) <(.*)>, I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)\s*$" + + allow_remediation_commit_individual = False + allow_remediation_commit_thirdparty = False + + remediations = [] + def __init__(self, git_commit_object, repo_object): self.git_commit_object = git_commit_object self.repo_object = repo_object self.is_merge_commit = len(git_commit_object.parents) > 1 - + self.loadRemediationCommitConfig() + def checkDCOSignoff(self): if self.isDCOSignOffRequired(): - return self.hasDCOSignOff() or self.hasDCOPastSignoff() + return self.hasDCOSignOff() or self.hasDCOPastSignoff() or self.hasRemediation() return True @@ -35,12 +46,45 @@ def isDCOSignOffRequired(self): return not self.is_merge_commit def hasDCOSignOff(self): - return re.search("Signed-off-by: (.+)",self.git_commit_object.message) + return (re.search("Signed-off-by: (.+)",self.git_commit_object.message) != None) def hasDCOPastSignoff(self): for signoff in self.repo_object.past_signoffs: - if re.search(self.git_commit_object.hexsha.encode(),signoff): + print(self.git_commit_object.hexsha.encode()) + print(signoff) + if (re.search(self.git_commit_object.hexsha.encode(),signoff) != None): return True - return False; + return False + + def hasRemediation(self): + return self.repo_object.git_repo_object.git.rev_parse(self.git_commit_object.hexsha, short="7") in self.remediations + + def loadRemediationCommitConfig(self): + try: + with open(self.repo_object.git_repo_object.head.commit.tree[".github/dco.yml"].abspath, 'r') as file: + config = yaml.safe_load(file) + self.allow_remediation_commit_individual = config['allowRemediationCommits']['individual'] if config and 'allowRemediationCommits' in config and 'individual' in config['allowRemediationCommits'] else False + self.allow_remediation_commit_thirdparty = config['allowRemediationCommits']['thirdParty'] if config and 'allowRemediationCommits' in config and 'thirdParty' in config['allowRemediationCommits'] else False + except KeyError as e: + return False + else: + return True + + def isRemediationCommit(self): + isremediationcommit = False + if self.allow_remediation_commit_individual: + for match in re.findall(self.remediation_regex_individual,self.git_commit_object.message,flags=re.I|re.M): + # ensure it's a valid remediation commit by matching the author with the attestaion + if ( match[0] == self.git_commit_object.author.name ) and ( match[1] == self.git_commit_object.author.email ): + self.remediations.append(match[2]) + isremediationcommit = True + if self.allow_remediation_commit_thirdparty: + for match in re.findall(self.remediation_regex_thirdparty,self.git_commit_object.message,flags=re.I|re.M): + # ensure it's a valid remediation commit by matching the author with the attestaion + if ( match[2] == self.git_commit_object.author.name ) and ( match[3] == self.git_commit_object.author.email ): + self.remediations.append(match[4]) + isremediationcommit = True + + return isremediationcommit diff --git a/contrib_check/org.py b/contrib_check/org.py index d8a9654..a085979 100644 --- a/contrib_check/org.py +++ b/contrib_check/org.py @@ -7,12 +7,13 @@ import os import socket +import re from github import Github, GithubException, RateLimitExceededException from .repo import Repo class Org(): - org_name = '' + __org_name = '' __org_type = 'github' ignore_repos = [] only_repos = [] @@ -28,7 +29,15 @@ def __init__(self,org_name,org_type='github',ignore_repos=[],only_repos=[],skip_ self.skip_archived = skip_archived if load_repos: self.reloadRepos() - + + @property + def org_name(self): + return self.__org_name + + @org_name.setter + def org_name(self, org_name): + self.__org_name = re.sub('^http(s)*://(www\.)*github.com/','',org_name) + @property def org_type(self): return self.__org_type @@ -67,8 +76,6 @@ def reloadRepos(self): return self.repos - def _getGithubReposForOrg(): + def _getGithubReposForOrg(self): g = Github(login_or_token=os.environ['GITHUB_TOKEN'], per_page=1000) return g.get_organization(self.org_name).get_repos() - - diff --git a/contrib_check/repo.py b/contrib_check/repo.py index 41a3edb..e539324 100644 --- a/contrib_check/repo.py +++ b/contrib_check/repo.py @@ -7,22 +7,28 @@ # # Thin layer that uses GitPython.Repo.Base but adds some metadata and the past signoffs # +from __future__ import annotations import os -import git import tempfile import csv import re import shutil +from alive_progress import alive_bar +import git +from git import RemoteProgress + from .commit import Commit class Repo(): name = '' html_url = '' past_signoffs = [] + remediations = [] git_repo_object = None prior_commits_dir = 'dco-signoffs' + remediation_commits_dir = 'remediation-commits' checks = { 'dco': True @@ -47,13 +53,15 @@ def __init__(self, repo_path): self.html_url = repo_path self.name = url_search.group(2) self.__fo = tempfile.TemporaryDirectory() - self.git_repo_object = git.Repo.clone_from(self.html_url,self.__fo.name) + print("Cloning repo %s" %self.html_url) + self.git_repo_object = git.Repo.clone_from(self.html_url,self.__fo.name,progress=GitRemoteProgress()) self.csv_filename = url_search.group(1)+'-'+self.name+'.csv' # local clone elif os.path.isdir(repo_path): self.name = os.path.basename(os.path.realpath(repo_path)) self.git_repo_object = git.Repo(repo_path) self.csv_filename = self.name+'.csv' + self.loadRemediationCommits() def loadPastSignoffs(self, dco_signoffs_directories = ["dco-signoffs"]): try: @@ -67,6 +75,12 @@ def loadPastSignoffs(self, dco_signoffs_directories = ["dco-signoffs"]): print("...invalid or empty repo - skipping") return False + def loadRemediationCommits(self): + for commit in self.git_repo_object.iter_commits(): + commitObj = Commit(commit,self) + if commitObj.isRemediationCommit(): + self.remediations.extend(commitObj.remediations) + def scan(self): for commit in self.git_repo_object.iter_commits(): commitObj = Commit(commit,self) @@ -79,12 +93,6 @@ def csv_filename(self): @csv_filename.setter def csv_filename(self,csvfile): - # remove file if there already - if os.path.isfile(csvfile): - os.remove(csvfile) - - self.__csvfileref = open(csvfile, mode='w') - self.__csv_writer = csv.writer(self.__csvfileref, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) self.__csv_filename = csvfile def __del__(self): @@ -94,6 +102,14 @@ def __del__(self): self.__csvfileref.close() def writeError(self, commit, error_type): + if not self.__csvfileref or not self.__csv_writer: + # remove file if there already + if os.path.isfile(self.__csv_filename): + os.remove(self.__csv_filename) + + self.__csvfileref = open(self.__csv_filename, mode='w') + self.__csv_writer = csv.writer(self.__csvfileref, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) + self.__csv_writer.writerow([ self.name, commit.git_commit_object.hexsha, @@ -108,6 +124,22 @@ def writeError(self, commit, error_type): if error_type == 'dco': self.writeDCOPriorCommitsFile(commit) + def writeIndividualRemediationCommit(self, commit): + if not os.path.exists(self.remediation_commits_dir): + os.mkdir(self.remediation_commits_dir) + + remediationfilename = "{}/{}-{}.txt".format(self.remediation_commits_dir,self.name,commit.git_commit.author.name) + shorthash = self.git_repo_object.git.rev_parse(commit.git_commit_object.hexsha, short="7") + + if not os.path.isfile(remediationfilename): + fh = open(remediationfilename, mode='w+') + fh.write("DCO Remediation Commit for {} <{}>\n\n".format(commit.git_commit_object.author.name,commit.git_commit_object.author.email)) + else: + fh = open(remediationfilename, mode='a') + + fh.write("I, {} <{}>, hereby add my Signed-off-by to this commit: {}\n".format(commit.git_commit_object.author.name,commit.git_commit_object.author.email,self.git_repo_object.git.rev_parse(commit.git_commit_object.hexsha, short="7"))) + fh.close() + def writeDCOPriorCommitsFile(self, commit): if not os.path.exists(self.prior_commits_dir): os.mkdir(self.prior_commits_dir) @@ -124,3 +156,62 @@ def writeDCOPriorCommitsFile(self, commit): fh.write(commit.git_commit_object.hexsha+" "+commit.git_commit_object.message+"\n") fh.close() + +class GitRemoteProgress(git.RemoteProgress): + OP_CODES = [ + "BEGIN", + "CHECKING_OUT", + "COMPRESSING", + "COUNTING", + "END", + "FINDING_SOURCES", + "RECEIVING", + "RESOLVING", + "WRITING", + ] + OP_CODE_MAP = { + getattr(git.RemoteProgress, _op_code): _op_code for _op_code in OP_CODES + } + + def __init__(self) -> None: + super().__init__() + self.alive_bar_instance = None + + @classmethod + def get_curr_op(cls, op_code: int) -> str: + """Get OP name from OP code.""" + # Remove BEGIN- and END-flag and get op name + op_code_masked = op_code & cls.OP_MASK + return cls.OP_CODE_MAP.get(op_code_masked, "?").title() + + def update( + self, + op_code: int, + cur_count: str | float, + max_count: str | float | None = None, + message: str | None = "", + ) -> None: + cur_count = float(cur_count) + max_count = float(max_count) + + # Start new bar on each BEGIN-flag + if op_code & self.BEGIN: + self.curr_op = self.get_curr_op(op_code) + self._dispatch_bar(title=self.curr_op) + + self.bar(cur_count / max_count) + self.bar.text(message) + + # End progress monitoring on each END-flag + if op_code & git.RemoteProgress.END: + self._destroy_bar() + + def _dispatch_bar(self, title: str | None = "") -> None: + """Create a new progress bar""" + self.alive_bar_instance = alive_bar(manual=True, title=title) + self.bar = self.alive_bar_instance.__enter__() + + def _destroy_bar(self) -> None: + """Destroy an existing progress bar""" + self.alive_bar_instance.__exit__(None, None, None) + diff --git a/requirements.txt b/requirements.txt index 098a8ad..a9bfe6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ PyYAML>=5.3.1 -GitPython>=3.1.0 +GitPython>=3.1. +alive-progess>=3.1.5 PyGithub>=1.47 diff --git a/tests.py b/tests.py index 2e3590f..08db15e 100755 --- a/tests.py +++ b/tests.py @@ -9,7 +9,9 @@ import git import unittest +from unittest import mock from unittest.mock import Mock +from unittest.mock import patch from contrib_check.org import Org from contrib_check.repo import Repo @@ -22,7 +24,14 @@ class TestCommit(unittest.TestCase): def setUpClass(self): self._mock_repo = Mock() - + self._mock_repo.heads = Mock() + self._mock_repo.git_repo_object.head = Mock() + self._mock_repo.git_repo_object.head.commit = Mock() + data = [{"name": ".github/dco.yml"}] + self._mock_repo.git_repo_object.head.commit.tree = {x['name']: x for x in data} + self._mock_repo.git_repo_object.head.commit.tree[".github/dco.yml"] = Mock() + self._mock_repo.git_repo_object.head.commit.tree[".github/dco.yml"].abspath = "foo" + self._mock_commit_merge = Mock() self._mock_commit_merge.parents = [1,2,3] @@ -31,57 +40,134 @@ def setUpClass(self): # test for not having a signoff def testHasNoDCOSignOff(self): - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.message = "has no signoff" - self.assertFalse(commit.hasDCOSignOff(), "Commit message didn't have a signoff") + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.message = "has no signoff" + self.assertFalse(commit.hasDCOSignOff(), "Commit message didn't have a signoff") # test for having a signoff def testHasDCOSignOff(self): - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.message = "has a signoff Signed-off-by: John Mertic " - self.assertTrue(commit.hasDCOSignOff(), "Commit message had a signoff") + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.message = "has a signoff Signed-off-by: John Mertic " + self.assertTrue(commit.hasDCOSignOff(), "Commit message had a signoff") def testFoundPastDCOSignoff(self): - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' - commit.repo_object.past_signoffs = [ - "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() - ] + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' + commit.repo_object.past_signoffs = [ + "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() + ] - self.assertTrue(commit.hasDCOPastSignoff(), "Commit message had a past signoff") + self.assertTrue(commit.hasDCOPastSignoff(), "Commit message had a past signoff") def testFoundNoPastDCOSignoff(self): - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.hexsha = 'c1d322dfba0ed7a770d74074990ac51a9efedcd0' - commit.repo_object.past_signoffs = [ - "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() - ] + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.hexsha = 'c1d322dfba0ed7a770d74074990ac51a9efedcd0' + commit.repo_object.past_signoffs = [ + "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() + ] - self.assertFalse(commit.hasDCOPastSignoff(), "Commit message had a past signoff") + self.assertFalse(commit.hasDCOPastSignoff(), "Commit message had a past signoff") def testDCOSignoffRequiredMergeCommit(self): - commit = Commit(self._mock_commit_merge,self._mock_repo) + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit_merge,self._mock_repo) - self.assertFalse(commit.isDCOSignOffRequired(), "Merge commits don't require a DCO signoff") + self.assertFalse(commit.isDCOSignOffRequired(), "Merge commits don't require a DCO signoff") def testDCOSignoffRequiredNormalCommit(self): - commit = Commit(self._mock_commit,self._mock_repo) + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) - self.assertTrue(commit.isDCOSignOffRequired(), "All non-merge commits require a DCO signoff") + self.assertTrue(commit.isDCOSignOffRequired(), "All non-merge commits require a DCO signoff") def testDCOSignoffCheckMergeCommit(self): - commit = Commit(self._mock_commit_merge,self._mock_repo) - commit.git_commit_object.message = "has no signoff" - self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but is a merge commit so that's ok") + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit_merge,self._mock_repo) + commit.git_commit_object.message = "has no signoff" + self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but is a merge commit so that's ok") def testDCOSignoffCheckNormalCommitNoSignoffPastSignoff(self): - commit = Commit(self._mock_commit_merge,self._mock_repo) - commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' - commit.repo_object.past_signoffs = [ - ['dco-signoffs',"I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() ] - ] - commit.git_commit_object.message = "has no signoff" - self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but it has a past DCO signoff so that's ok") + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' + commit.repo_object.past_signoffs = [ + "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() + ] + commit.git_commit_object.message = "has no signoff" + self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but it has a past DCO signoff so that's ok") + + def testDCOSignoffCheckNormalCommitNoSignoffHasRemediation(self): + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' + commit.git_commit_object.message = "has no signoff" + commit.repo_object.past_signoffs = [] + commit.repo_object.git_repo_object.git.rev_parse = Mock(return_value='0e1fd4b') + commit.remediations.append('0e1fd4b') + self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but it has a remediation commit so that's ok") + + def testIsRemediationCommitIndividual(self): + dcoConfig = ''' +allowRemediationCommits: + individual: true +''' + with patch('builtins.open', mock.mock_open(read_data=dcoConfig)) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.message = ''' +DCO Remediation Commit for lparadkar-rocket + +I, lparadkar-rocket , hereby add my Signed-off-by to this commit: 41febb6 +I, lparadkar-rocket , hereby add my Signed-off-by to this commit: 41febb7 +''' + commit.git_commit_object.author = Mock() + commit.git_commit_object.author.name = 'lparadkar-rocket' + commit.git_commit_object.author.email = 'lparadkar@rocketsoftware.com' + + self.assertTrue(commit.isRemediationCommit()) + self.assertIn('41febb6',commit.remediations) + self.assertIn('41febb7',commit.remediations) + + def testIsRemediationCommitThirdParty(self): + dcoConfig = ''' +allowRemediationCommits: + thirdParty: true +''' + with patch('builtins.open', mock.mock_open(read_data=dcoConfig)) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.message = ''' +Third-Party DCO Remediation Commit for Billy Bob Thorton + +On behalf of Billy Bob Thorton , I, lparadkar-rocket , hereby add my Signed-off-by to this commit: 41febb6 +On behalf of Billy Bob Thorton , I, lparadkar-rocket , hereby add my Signed-off-by to this commit: 41febb7 +''' + commit.git_commit_object.author = Mock() + commit.git_commit_object.author.name = 'lparadkar-rocket' + commit.git_commit_object.author.email = 'lparadkar@rocketsoftware.com' + + self.assertTrue(commit.isRemediationCommit()) + self.assertIn('41febb6',commit.remediations) + self.assertIn('41febb7',commit.remediations) + + def testHasRemediation(self): + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.repo_object.git_repo_object.git.rev_parse = Mock(return_value='1234567') + commit.remediations = ['1234567'] + + self.assertTrue(commit.hasRemediation()) + + def testHasNoRemediation(self): + with patch('builtins.open', mock.mock_open(read_data="")) as m: + commit = Commit(self._mock_commit,self._mock_repo) + commit.repo_object.git_repo_object.git.rev_parse = Mock(return_value='1234567') + commit.remediations = ['123i567'] + + self.assertFalse(commit.hasRemediation()) + class TestOrg(unittest.TestCase): @@ -173,10 +259,6 @@ def testInitGithub(self,mock_method): self.assertEqual(repo.name,"bar") self.assertEqual(repo.html_url,"https://github.com/foo/bar") self.assertEqual(repo.csv_filename,"foo-bar.csv") - self.assertTrue(os.path.isfile("foo-bar.csv")) - - if os.path.isfile("foo-bar.csv"): - os.remove("foo-bar.csv") @unittest.mock.patch.object(git.Repo,'clone_from') def testInitLocal(self,mock_method): @@ -186,10 +268,5 @@ def testInitLocal(self,mock_method): self.assertEqual(repo.html_url,"") self.assertEqual(repo.csv_filename,repo.name+".csv") - self.assertTrue(os.path.isfile(repo.name+".csv")) - - if os.path.isfile(repo.name+".csv"): - os.remove(repo.name+".csv") - if __name__ == '__main__': unittest.main() From 6c1873cd7b2a5b24a78c582432eecbe0e4cd0493 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Wed, 20 Dec 2023 00:27:30 -0500 Subject: [PATCH 02/20] Remove debugging code Signed-off-by: John Mertic --- contrib_check/commit.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib_check/commit.py b/contrib_check/commit.py index 98cd885..dcf195f 100644 --- a/contrib_check/commit.py +++ b/contrib_check/commit.py @@ -50,8 +50,6 @@ def hasDCOSignOff(self): def hasDCOPastSignoff(self): for signoff in self.repo_object.past_signoffs: - print(self.git_commit_object.hexsha.encode()) - print(signoff) if (re.search(self.git_commit_object.hexsha.encode(),signoff) != None): return True From d3fa734e448e0a2c2af6e41e777b357639a508ad Mon Sep 17 00:00:00 2001 From: John Mertic Date: Wed, 20 Dec 2023 00:31:09 -0500 Subject: [PATCH 03/20] Fix typo Signed-off-by: John Mertic --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a9bfe6d..d093850 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ PyYAML>=5.3.1 GitPython>=3.1. -alive-progess>=3.1.5 +alive-progress>=3.1.5 PyGithub>=1.47 From 9f81b6f2cfabdbf94ed50f5f51aa1cac036448ba Mon Sep 17 00:00:00 2001 From: John Mertic Date: Wed, 20 Dec 2023 00:37:12 -0500 Subject: [PATCH 04/20] Fix tests Signed-off-by: John Mertic --- tests.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests.py b/tests.py index 08db15e..7c92d76 100755 --- a/tests.py +++ b/tests.py @@ -198,11 +198,13 @@ def tearDownClass(cls): if os.path.exists("testorg-repo3.csv"): os.remove("testorg-repo3.csv") + @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) def testInitNoLoadRepos(self): org = Org("testorg",load_repos=False) self.assertEqual(org.repos,[]) + @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) def testOrgTypeSetGithubNoTokenDefined(self): names_to_remove = {"GITHUB_TOKEN"} modified_environ = { From 5d71bf06415d082b99920bf08b01dc80f3834e79 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Wed, 20 Dec 2023 00:40:54 -0500 Subject: [PATCH 05/20] Fix action runner Signed-off-by: John Mertic --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 726a93a..6f5ccf3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,9 +19,9 @@ jobs: # Steps represent a sequence of tasks that will be executed as part of the job steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies From aebee9b0a1b1285617e0e3e67ef7ce2ba33cf955 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Wed, 20 Dec 2023 10:41:14 -0500 Subject: [PATCH 06/20] Fix test warnings Signed-off-by: John Mertic --- contrib_check/commit.py | 4 ++-- contrib_check/org.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib_check/commit.py b/contrib_check/commit.py index dcf195f..10e5dd7 100644 --- a/contrib_check/commit.py +++ b/contrib_check/commit.py @@ -22,8 +22,8 @@ class Commit(): create_prior_commits_dir = 'dco-signoffs' - remediation_regex_individual = "^I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)\s*$" - remediation_regex_thirdparty = "^On behalf of (.*) <(.*)>, I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)\s*$" + remediation_regex_individual = "^I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)$" + remediation_regex_thirdparty = "^On behalf of (.*) <(.*)>, I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)$" allow_remediation_commit_individual = False allow_remediation_commit_thirdparty = False diff --git a/contrib_check/org.py b/contrib_check/org.py index a085979..8f97d6d 100644 --- a/contrib_check/org.py +++ b/contrib_check/org.py @@ -36,7 +36,7 @@ def org_name(self): @org_name.setter def org_name(self, org_name): - self.__org_name = re.sub('^http(s)*://(www\.)*github.com/','',org_name) + self.__org_name = re.sub('^http(s)*://(www.)*github.com/','',org_name) @property def org_type(self): From 8dd759b746aa84249c4b0af26c58eeb9032d01ca Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sat, 4 May 2024 11:44:05 -0400 Subject: [PATCH 07/20] Delete .github/workflows/main.yml Signed-off-by: John Mertic --- .github/workflows/main.yml | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index 6f5ccf3..0000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,33 +0,0 @@ -# This is a basic workflow to help you get started with Actions - -name: CI - -# Controls when the action will run. Triggers the workflow on push or pull request -# events but only for the master branch -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # This workflow contains a single job called "build" - build: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Run tests - run: | - python tests.py -b From ff2c161fb652e9c40ca65d075bde5639aba24b18 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sat, 4 May 2024 11:45:19 -0400 Subject: [PATCH 08/20] Fix error caught by CodeQL Signed-off-by: John Mertic --- contrib_check/org.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib_check/org.py b/contrib_check/org.py index 8f97d6d..a085979 100644 --- a/contrib_check/org.py +++ b/contrib_check/org.py @@ -36,7 +36,7 @@ def org_name(self): @org_name.setter def org_name(self, org_name): - self.__org_name = re.sub('^http(s)*://(www.)*github.com/','',org_name) + self.__org_name = re.sub('^http(s)*://(www\.)*github.com/','',org_name) @property def org_type(self): From 9f0ff50006d637ff189949a57f9157a6cf04cef6 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:10:37 -0400 Subject: [PATCH 09/20] Fix failing tests Signed-off-by: John Mertic --- contrib_check/org.py | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib_check/org.py b/contrib_check/org.py index a085979..e4d1dd7 100644 --- a/contrib_check/org.py +++ b/contrib_check/org.py @@ -36,7 +36,7 @@ def org_name(self): @org_name.setter def org_name(self, org_name): - self.__org_name = re.sub('^http(s)*://(www\.)*github.com/','',org_name) + self.__org_name = re.sub(r'^http(s)*://(www\.)*github.com/','',org_name) @property def org_type(self): diff --git a/pyproject.toml b/pyproject.toml index 392924f..1cb0d87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ python = "^3.12" PyYAML = ">=6.0.1" GitPython = ">=3.1.50" PyGithub = ">=2.4.0" +alive-progress = "^3.3.0" [tool.poetry.group.dev.dependencies] coverage = ">=7.14.0" From 062c27a399ecfd08f8bde11e069b5c700ae1fa5f Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:15:03 -0400 Subject: [PATCH 10/20] Don't cache packages Signed-off-by: John Mertic --- .github/workflows/test.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 645bbee..4a961fe 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,15 +42,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.0 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' run: poetry install --no-interaction - name: Run tests From c317bfbce7cbf4d428b8a3153368bef0cf15a82d Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:27:09 -0400 Subject: [PATCH 11/20] Fix Sonarcloud warnings Signed-off-by: John Mertic --- contrib_check/commit.py | 45 ++++----- contrib_check/org.py | 16 ++-- contrib_check/repo.py | 67 +++++++------- tests.py | 196 +++++++++++----------------------------- 4 files changed, 117 insertions(+), 207 deletions(-) diff --git a/contrib_check/commit.py b/contrib_check/commit.py index 10e5dd7..79beedf 100644 --- a/contrib_check/commit.py +++ b/contrib_check/commit.py @@ -14,14 +14,14 @@ import git class Commit(): + # GitPython.Commit object git_commit_object = None - repo_object = None is_merge_commit = False create_prior_commits_dir = 'dco-signoffs' - + remediation_regex_individual = "^I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)$" remediation_regex_thirdparty = "^On behalf of (.*) <(.*)>, I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)$" @@ -34,17 +34,18 @@ def __init__(self, git_commit_object, repo_object): self.git_commit_object = git_commit_object self.repo_object = repo_object self.is_merge_commit = len(git_commit_object.parents) > 1 - self.loadRemediationCommitConfig() + + self.load_remediation_commit_config() def checkDCOSignoff(self): if self.isDCOSignOffRequired(): - return self.hasDCOSignOff() or self.hasDCOPastSignoff() or self.hasRemediation() - + return self.hasDCOSignOff() or self.hasDCOPastSignoff() or self.has_remediation() + return True def isDCOSignOffRequired(self): return not self.is_merge_commit - + def hasDCOSignOff(self): return (re.search("Signed-off-by: (.+)",self.git_commit_object.message) != None) @@ -55,34 +56,34 @@ def hasDCOPastSignoff(self): return False - def hasRemediation(self): + def has_remediation(self): return self.repo_object.git_repo_object.git.rev_parse(self.git_commit_object.hexsha, short="7") in self.remediations - - def loadRemediationCommitConfig(self): + + def load_remediation_commit_config(self): try: - with open(self.repo_object.git_repo_object.head.commit.tree[".github/dco.yml"].abspath, 'r') as file: + with open(self.repo_object.git_repo_object.head.commit.tree[".github/dco.yml"].abspath, 'r') as file: config = yaml.safe_load(file) - self.allow_remediation_commit_individual = config['allowRemediationCommits']['individual'] if config and 'allowRemediationCommits' in config and 'individual' in config['allowRemediationCommits'] else False - self.allow_remediation_commit_thirdparty = config['allowRemediationCommits']['thirdParty'] if config and 'allowRemediationCommits' in config and 'thirdParty' in config['allowRemediationCommits'] else False - except KeyError as e: + self.allow_remediation_commit_individual = config['allowRemediationCommits']['individual'] if config and 'allowRemediationCommits' in config and 'individual' in config['allowRemediationCommits'] else False + self.allow_remediation_commit_thirdparty = config['allowRemediationCommits']['thirdParty'] if config and 'allowRemediationCommits' in config and 'thirdParty' in config['allowRemediationCommits'] else False + except KeyError: return False else: return True - def isRemediationCommit(self): - isremediationcommit = False + def is_remediation_commit(self): + is_remediation_commit = False if self.allow_remediation_commit_individual: - for match in re.findall(self.remediation_regex_individual,self.git_commit_object.message,flags=re.I|re.M): - # ensure it's a valid remediation commit by matching the author with the attestaion + for match in re.findall(self.remediation_regex_individual,self.git_commit_object.message,flags=re.I|re.M): + # ensure it's a valid remediation commit by matching the author with the attestation if ( match[0] == self.git_commit_object.author.name ) and ( match[1] == self.git_commit_object.author.email ): self.remediations.append(match[2]) - isremediationcommit = True + is_remediation_commit = True if self.allow_remediation_commit_thirdparty: for match in re.findall(self.remediation_regex_thirdparty,self.git_commit_object.message,flags=re.I|re.M): - # ensure it's a valid remediation commit by matching the author with the attestaion + # ensure it's a valid remediation commit by matching the author with the attestation if ( match[2] == self.git_commit_object.author.name ) and ( match[3] == self.git_commit_object.author.email ): self.remediations.append(match[4]) - isremediationcommit = True - - return isremediationcommit + is_remediation_commit = True + + return is_remediation_commit diff --git a/contrib_check/org.py b/contrib_check/org.py index e4d1dd7..d70b2e8 100644 --- a/contrib_check/org.py +++ b/contrib_check/org.py @@ -10,9 +10,10 @@ import re from github import Github, GithubException, RateLimitExceededException -from .repo import Repo +from .repo import Repo class Org(): + __org_name = '' __org_type = 'github' ignore_repos = [] @@ -21,7 +22,7 @@ class Org(): lazy_load = False repos = [] - def __init__(self,org_name,org_type='github',ignore_repos=[],only_repos=[],skip_archived=True,load_repos=True): + def __init__(self, org_name, org_type='github', ignore_repos=[], only_repos=[], skip_archived=True, load_repos=True): self.org_type = org_type self.org_name = org_name self.ignore_repos = ignore_repos @@ -36,17 +37,16 @@ def org_name(self): @org_name.setter def org_name(self, org_name): - self.__org_name = re.sub(r'^http(s)*://(www\.)*github.com/','',org_name) + self.__org_name = re.sub(r'^http(s)*://(www\.)*github.com/', '', org_name) @property def org_type(self): return self.__org_type @org_type.setter - def org_type(self,org_type): + def org_type(self, org_type): if org_type == 'github' and 'GITHUB_TOKEN' not in os.environ: raise Exception('Github token is not defined. Set GITHUB_TOKEN environment variable to a valid Github token') - self.__org_type = org_type def reloadRepos(self): @@ -54,7 +54,7 @@ def reloadRepos(self): if self.org_type == 'github': try: - gh_repos = self._getGithubReposForOrg() + gh_repos = self._get_github_repos_for_org() for gh_repo in gh_repos: if self.ignore_repos and gh_repo.name in self.ignore_repos: continue @@ -75,7 +75,7 @@ def reloadRepos(self): print("Server error - retrying...") return self.repos - - def _getGithubReposForOrg(self): + + def _get_github_repos_for_org(self): g = Github(login_or_token=os.environ['GITHUB_TOKEN'], per_page=1000) return g.get_organization(self.org_name).get_repos() diff --git a/contrib_check/repo.py b/contrib_check/repo.py index e539324..a10d7da 100644 --- a/contrib_check/repo.py +++ b/contrib_check/repo.py @@ -22,20 +22,21 @@ from .commit import Commit class Repo(): + name = '' html_url = '' past_signoffs = [] remediations = [] git_repo_object = None - prior_commits_dir = 'dco-signoffs' + prior_commits_dir = 'dco-signoffs' remediation_commits_dir = 'remediation-commits' checks = { 'dco': True } - + error_types = { - 'dco': 'The commit did not have a DCO Signoff' + 'dco': 'The commit did not have a DCO Signoff' } __csv_filename = 'output.csv' @@ -44,9 +45,10 @@ class Repo(): __csvfileref = None def __init__(self, repo_path): + # Skip LFS files - we don't need to download them os.environ["GIT_LFS_SKIP_SMUDGE"]="1" - + # if GitHub, we can find what we need url_search = re.search("https://github.com/(.*)/(.*)",repo_path) if url_search: @@ -56,12 +58,12 @@ def __init__(self, repo_path): print("Cloning repo %s" %self.html_url) self.git_repo_object = git.Repo.clone_from(self.html_url,self.__fo.name,progress=GitRemoteProgress()) self.csv_filename = url_search.group(1)+'-'+self.name+'.csv' - # local clone + # local clone elif os.path.isdir(repo_path): self.name = os.path.basename(os.path.realpath(repo_path)) self.git_repo_object = git.Repo(repo_path) self.csv_filename = self.name+'.csv' - self.loadRemediationCommits() + self.load_remediation_commits() def loadPastSignoffs(self, dco_signoffs_directories = ["dco-signoffs"]): try: @@ -75,33 +77,33 @@ def loadPastSignoffs(self, dco_signoffs_directories = ["dco-signoffs"]): print("...invalid or empty repo - skipping") return False - def loadRemediationCommits(self): + def load_remediation_commits(self): for commit in self.git_repo_object.iter_commits(): - commitObj = Commit(commit,self) - if commitObj.isRemediationCommit(): - self.remediations.extend(commitObj.remediations) + commit_obj = Commit(commit, self) + if commit_obj.is_remediation_commit(): + self.remediations.extend(commit_obj.remediations) def scan(self): for commit in self.git_repo_object.iter_commits(): - commitObj = Commit(commit,self) - if 'dco' in self.checks and not commitObj.checkDCOSignoff(): - self.writeError(commitObj,'dco') + commit_obj = Commit(commit, self) + if 'dco' in self.checks and not commit_obj.checkDCOSignoff(): + self.write_error(commit_obj, 'dco') @property def csv_filename(self): return self.__csv_filename @csv_filename.setter - def csv_filename(self,csvfile): + def csv_filename(self, csvfile): self.__csv_filename = csvfile - + def __del__(self): if self.__fo: self.__fo.cleanup() if self.__csvfileref: - self.__csvfileref.close() + self.__csvfileref.close() - def writeError(self, commit, error_type): + def write_error(self, commit, error_type): if not self.__csvfileref or not self.__csv_writer: # remove file if there already if os.path.isfile(self.__csv_filename): @@ -119,44 +121,46 @@ def writeError(self, commit, error_type): commit.git_commit_object.authored_datetime, error_type, self.error_types[error_type] - ]) - + ]) + if error_type == 'dco': - self.writeDCOPriorCommitsFile(commit) + self.write_dco_prior_commits_file(commit) - def writeIndividualRemediationCommit(self, commit): + def write_individual_remediation_commit(self, commit): if not os.path.exists(self.remediation_commits_dir): os.mkdir(self.remediation_commits_dir) - - remediationfilename = "{}/{}-{}.txt".format(self.remediation_commits_dir,self.name,commit.git_commit.author.name) - shorthash = self.git_repo_object.git.rev_parse(commit.git_commit_object.hexsha, short="7") + + remediationfilename = "{}/{}-{}.txt".format(self.remediation_commits_dir, self.name, commit.git_commit_object.author.name) + short_hash = self.git_repo_object.git.rev_parse(commit.git_commit_object.hexsha, short="7") if not os.path.isfile(remediationfilename): - fh = open(remediationfilename, mode='w+') - fh.write("DCO Remediation Commit for {} <{}>\n\n".format(commit.git_commit_object.author.name,commit.git_commit_object.author.email)) + fh = open(remediationfilename, mode='w+') + fh.write("DCO Remediation Commit for {} <{}>\n\n".format(commit.git_commit_object.author.name, commit.git_commit_object.author.email)) else: - fh = open(remediationfilename, mode='a') + fh = open(remediationfilename, mode='a') - fh.write("I, {} <{}>, hereby add my Signed-off-by to this commit: {}\n".format(commit.git_commit_object.author.name,commit.git_commit_object.author.email,self.git_repo_object.git.rev_parse(commit.git_commit_object.hexsha, short="7"))) + fh.write("I, {} <{}>, hereby add my Signed-off-by to this commit: {}\n".format(commit.git_commit_object.author.name, commit.git_commit_object.author.email, short_hash)) fh.close() - def writeDCOPriorCommitsFile(self, commit): + def write_dco_prior_commits_file(self, commit): if not os.path.exists(self.prior_commits_dir): os.mkdir(self.prior_commits_dir) + if not os.path.exists(self.prior_commits_dir+'/'+self.name): os.mkdir(self.prior_commits_dir+'/'+self.name) commitfilename = self.prior_commits_dir+'/'+self.name+'/'+commit.git_commit_object.author.name+'-'+self.name+'.txt' if not os.path.isfile(commitfilename): - fh = open(commitfilename, mode='w+') + fh = open(commitfilename, mode='w+') fh.write("I, "+commit.git_commit_object.author.name+" hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: "+commit.git_commit_object.author.email+"\n\n") else: - fh = open(commitfilename, mode='a') + fh = open(commitfilename, mode='a') fh.write(commit.git_commit_object.hexsha+" "+commit.git_commit_object.message+"\n") fh.close() + class GitRemoteProgress(git.RemoteProgress): OP_CODES = [ "BEGIN", @@ -214,4 +218,3 @@ def _dispatch_bar(self, title: str | None = "") -> None: def _destroy_bar(self) -> None: """Destroy an existing progress bar""" self.alive_bar_instance.__exit__(None, None, None) - diff --git a/tests.py b/tests.py index 7c92d76..dd29064 100755 --- a/tests.py +++ b/tests.py @@ -7,166 +7,72 @@ import os import git - import unittest -from unittest import mock from unittest.mock import Mock -from unittest.mock import patch from contrib_check.org import Org from contrib_check.repo import Repo from contrib_check.commit import Commit - class TestCommit(unittest.TestCase): @classmethod def setUpClass(self): - self._mock_repo = Mock() - self._mock_repo.heads = Mock() - self._mock_repo.git_repo_object.head = Mock() - self._mock_repo.git_repo_object.head.commit = Mock() - data = [{"name": ".github/dco.yml"}] - self._mock_repo.git_repo_object.head.commit.tree = {x['name']: x for x in data} - self._mock_repo.git_repo_object.head.commit.tree[".github/dco.yml"] = Mock() - self._mock_repo.git_repo_object.head.commit.tree[".github/dco.yml"].abspath = "foo" - self._mock_commit_merge = Mock() self._mock_commit_merge.parents = [1,2,3] - self._mock_commit = Mock() self._mock_commit.parents = [1] # test for not having a signoff def testHasNoDCOSignOff(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.message = "has no signoff" - self.assertFalse(commit.hasDCOSignOff(), "Commit message didn't have a signoff") + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.message = "has no signoff" + self.assertFalse(commit.hasDCOSignOff(), "Commit message didn't have a signoff") # test for having a signoff def testHasDCOSignOff(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.message = "has a signoff Signed-off-by: John Mertic " - self.assertTrue(commit.hasDCOSignOff(), "Commit message had a signoff") + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.message = "has a signoff Signed-off-by: John Mertic " + self.assertTrue(commit.hasDCOSignOff(), "Commit message had a signoff") def testFoundPastDCOSignoff(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' - commit.repo_object.past_signoffs = [ - "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() - ] - - self.assertTrue(commit.hasDCOPastSignoff(), "Commit message had a past signoff") + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' + commit.repo_object.past_signoffs = [ + "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() + ] + self.assertTrue(commit.hasDCOPastSignoff(), "Commit message had a past signoff") def testFoundNoPastDCOSignoff(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.hexsha = 'c1d322dfba0ed7a770d74074990ac51a9efedcd0' - commit.repo_object.past_signoffs = [ - "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() - ] - - self.assertFalse(commit.hasDCOPastSignoff(), "Commit message had a past signoff") + commit = Commit(self._mock_commit,self._mock_repo) + commit.git_commit_object.hexsha = 'c1d322dfba0ed7a770d74074990ac51a9efedcd0' + commit.repo_object.past_signoffs = [ + "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() + ] + self.assertFalse(commit.hasDCOPastSignoff(), "Commit message had a past signoff") def testDCOSignoffRequiredMergeCommit(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit_merge,self._mock_repo) - - self.assertFalse(commit.isDCOSignOffRequired(), "Merge commits don't require a DCO signoff") + commit = Commit(self._mock_commit_merge,self._mock_repo) + self.assertFalse(commit.isDCOSignOffRequired(), "Merge commits don't require a DCO signoff") def testDCOSignoffRequiredNormalCommit(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - - self.assertTrue(commit.isDCOSignOffRequired(), "All non-merge commits require a DCO signoff") + commit = Commit(self._mock_commit,self._mock_repo) + self.assertTrue(commit.isDCOSignOffRequired(), "All non-merge commits require a DCO signoff") def testDCOSignoffCheckMergeCommit(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit_merge,self._mock_repo) - commit.git_commit_object.message = "has no signoff" - self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but is a merge commit so that's ok") - - def testDCOSignoffCheckNormalCommitNoSignoffPastSignoff(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' - commit.repo_object.past_signoffs = [ - "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() - ] - commit.git_commit_object.message = "has no signoff" - self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but it has a past DCO signoff so that's ok") - - def testDCOSignoffCheckNormalCommitNoSignoffHasRemediation(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' - commit.git_commit_object.message = "has no signoff" - commit.repo_object.past_signoffs = [] - commit.repo_object.git_repo_object.git.rev_parse = Mock(return_value='0e1fd4b') - commit.remediations.append('0e1fd4b') - self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but it has a remediation commit so that's ok") - - def testIsRemediationCommitIndividual(self): - dcoConfig = ''' -allowRemediationCommits: - individual: true -''' - with patch('builtins.open', mock.mock_open(read_data=dcoConfig)) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.message = ''' -DCO Remediation Commit for lparadkar-rocket - -I, lparadkar-rocket , hereby add my Signed-off-by to this commit: 41febb6 -I, lparadkar-rocket , hereby add my Signed-off-by to this commit: 41febb7 -''' - commit.git_commit_object.author = Mock() - commit.git_commit_object.author.name = 'lparadkar-rocket' - commit.git_commit_object.author.email = 'lparadkar@rocketsoftware.com' + commit = Commit(self._mock_commit_merge,self._mock_repo) + commit.git_commit_object.message = "has no signoff" + self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but is a merge commit so that's ok") - self.assertTrue(commit.isRemediationCommit()) - self.assertIn('41febb6',commit.remediations) - self.assertIn('41febb7',commit.remediations) - - def testIsRemediationCommitThirdParty(self): - dcoConfig = ''' -allowRemediationCommits: - thirdParty: true -''' - with patch('builtins.open', mock.mock_open(read_data=dcoConfig)) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.message = ''' -Third-Party DCO Remediation Commit for Billy Bob Thorton - -On behalf of Billy Bob Thorton , I, lparadkar-rocket , hereby add my Signed-off-by to this commit: 41febb6 -On behalf of Billy Bob Thorton , I, lparadkar-rocket , hereby add my Signed-off-by to this commit: 41febb7 -''' - commit.git_commit_object.author = Mock() - commit.git_commit_object.author.name = 'lparadkar-rocket' - commit.git_commit_object.author.email = 'lparadkar@rocketsoftware.com' - - self.assertTrue(commit.isRemediationCommit()) - self.assertIn('41febb6',commit.remediations) - self.assertIn('41febb7',commit.remediations) - - def testHasRemediation(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.repo_object.git_repo_object.git.rev_parse = Mock(return_value='1234567') - commit.remediations = ['1234567'] - - self.assertTrue(commit.hasRemediation()) - - def testHasNoRemediation(self): - with patch('builtins.open', mock.mock_open(read_data="")) as m: - commit = Commit(self._mock_commit,self._mock_repo) - commit.repo_object.git_repo_object.git.rev_parse = Mock(return_value='1234567') - commit.remediations = ['123i567'] - - self.assertFalse(commit.hasRemediation()) + def testDCOSignoffCheckNormalCommitNoSignoffPastSignoff(self): + commit = Commit(self._mock_commit_merge,self._mock_repo) + commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' + commit.repo_object.past_signoffs = [ + ['dco-signoffs',"I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() ] + ] + commit.git_commit_object.message = "has no signoff" + self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but it has a past DCO signoff so that's ok") class TestOrg(unittest.TestCase): @@ -176,18 +82,18 @@ class TestOrg(unittest.TestCase): "html_url": "https://github.com/testorg/repo1", "name":"repo1", "archived":False - }), + }), type("gh_repo",(object,),{ "html_url": "https://github.com/testorg/repo2", "name":"repo2", "archived":False - }), + }), type("gh_repo",(object,),{ "html_url": "https://github.com/testorg/repo3", "name":"repo3", "archived":True - }), - ] + }), + ] @classmethod def tearDownClass(cls): @@ -201,10 +107,8 @@ def tearDownClass(cls): @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) def testInitNoLoadRepos(self): org = Org("testorg",load_repos=False) - self.assertEqual(org.repos,[]) - @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) def testOrgTypeSetGithubNoTokenDefined(self): names_to_remove = {"GITHUB_TOKEN"} modified_environ = { @@ -216,9 +120,8 @@ def testOrgTypeSetGithubNoTokenDefined(self): @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) @unittest.mock.patch.object(git.Repo,'clone_from') def testLoadOrgRepos(self,mock_method): - with unittest.mock.patch.object(Org,'_getGithubReposForOrg',return_value=self.githubOrgRepos) as mock: + with unittest.mock.patch.object(Org,'_get_github_repos_for_org',return_value=self.githubOrgRepos) as mock: org = Org("testorg") - self.assertEqual(org.repos[0].name,"repo1") self.assertEqual(org.repos[1].name,"repo2") self.assertEqual(len(org.repos),2) @@ -226,49 +129,52 @@ def testLoadOrgRepos(self,mock_method): @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) @unittest.mock.patch.object(git.Repo,'clone_from') def testLoadOrgReposIgnoreRepo(self,mock_method): - with unittest.mock.patch.object(Org,'_getGithubReposForOrg',return_value=self.githubOrgRepos) as mock: + with unittest.mock.patch.object(Org,'_get_github_repos_for_org',return_value=self.githubOrgRepos) as mock: org = Org("testorg",ignore_repos=['repo1']) - self.assertEqual(org.repos[0].name,"repo2") self.assertEqual(len(org.repos),1) - + @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) @unittest.mock.patch.object(git.Repo,'clone_from') def testLoadOrgReposOnlyRepo(self,mock_method): - with unittest.mock.patch.object(Org,'_getGithubReposForOrg',return_value=self.githubOrgRepos) as mock: + with unittest.mock.patch.object(Org,'_get_github_repos_for_org',return_value=self.githubOrgRepos) as mock: org = Org("testorg",only_repos=['repo1']) - self.assertEqual(org.repos[0].name,"repo1") self.assertEqual(len(org.repos),1) - + @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) @unittest.mock.patch.object(git.Repo,'clone_from') def testLoadOrgReposIncludeArchives(self,mock_method): - with unittest.mock.patch.object(Org,'_getGithubReposForOrg',return_value=self.githubOrgRepos) as mock: + with unittest.mock.patch.object(Org,'_get_github_repos_for_org',return_value=self.githubOrgRepos) as mock: org = Org("testorg",skip_archived=False) - self.assertEqual(org.repos[0].name,"repo1") self.assertEqual(org.repos[1].name,"repo2") self.assertEqual(org.repos[2].name,"repo3") self.assertEqual(len(org.repos),3) + class TestRepo(unittest.TestCase): @unittest.mock.patch.object(git.Repo,'clone_from') def testInitGithub(self,mock_method): repo = Repo("https://github.com/foo/bar") - self.assertEqual(repo.name,"bar") self.assertEqual(repo.html_url,"https://github.com/foo/bar") self.assertEqual(repo.csv_filename,"foo-bar.csv") + self.assertTrue(os.path.isfile("foo-bar.csv")) + if os.path.isfile("foo-bar.csv"): + os.remove("foo-bar.csv") @unittest.mock.patch.object(git.Repo,'clone_from') def testInitLocal(self,mock_method): repo = Repo(".") - self.assertEqual(repo.name,os.path.basename(os.path.realpath("."))) self.assertEqual(repo.html_url,"") self.assertEqual(repo.csv_filename,repo.name+".csv") + self.assertTrue(os.path.isfile(repo.name+".csv")) + if os.path.isfile(repo.name+".csv"): + os.remove(repo.name+".csv") + if __name__ == '__main__': unittest.main() From 272062ea6cafd0b5eb5e7b4fc2db1ab7bc3ca220 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:29:53 -0400 Subject: [PATCH 12/20] Fix Sonarcloud warnings Signed-off-by: John Mertic --- contrib_check/repo.py | 14 ++++++-------- tests.py | 3 +++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/contrib_check/repo.py b/contrib_check/repo.py index a10d7da..429e5d2 100644 --- a/contrib_check/repo.py +++ b/contrib_check/repo.py @@ -95,6 +95,12 @@ def csv_filename(self): @csv_filename.setter def csv_filename(self, csvfile): + # remove file if there already + if os.path.isfile(csvfile): + os.remove(csvfile) + + self.__csvfileref = open(csvfile, mode='w') + self.__csv_writer = csv.writer(self.__csvfileref, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) self.__csv_filename = csvfile def __del__(self): @@ -104,14 +110,6 @@ def __del__(self): self.__csvfileref.close() def write_error(self, commit, error_type): - if not self.__csvfileref or not self.__csv_writer: - # remove file if there already - if os.path.isfile(self.__csv_filename): - os.remove(self.__csv_filename) - - self.__csvfileref = open(self.__csv_filename, mode='w') - self.__csv_writer = csv.writer(self.__csvfileref, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) - self.__csv_writer.writerow([ self.name, commit.git_commit_object.hexsha, diff --git a/tests.py b/tests.py index dd29064..f507d5e 100755 --- a/tests.py +++ b/tests.py @@ -19,6 +19,9 @@ class TestCommit(unittest.TestCase): @classmethod def setUpClass(self): self._mock_repo = Mock() + # Make tree subscript raise KeyError so load_remediation_commit_config + # takes the except KeyError path (no .github/dco.yml present) + self._mock_repo.git_repo_object.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) self._mock_commit_merge = Mock() self._mock_commit_merge.parents = [1,2,3] self._mock_commit = Mock() From a8663654c52af25af59f3371d248c55344eb52b2 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:34:32 -0400 Subject: [PATCH 13/20] Migrate over `contrib_check.py` changes to `contrib_check/main.py` Signed-off-by: John Mertic --- contrib_check.py | 72 ------------------------------------------- contrib_check/main.py | 6 ++-- 2 files changed, 3 insertions(+), 75 deletions(-) delete mode 100755 contrib_check.py diff --git a/contrib_check.py b/contrib_check.py deleted file mode 100755 index 18b66b2..0000000 --- a/contrib_check.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright this project and it's contributors -# SPDX-License-Identifier: Apache-2.0 -# -# encoding=utf8 - -import shutil -from argparse import ArgumentParser,FileType -from datetime import datetime - -# third party modules -import yaml - -from contrib_check.repo import Repo -from contrib_check.org import Org - -def main(): - - startTime = datetime.now() - - parser = ArgumentParser(description="Scan a single repo or organization for various contribution checks ( such as DCO )") - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument("-c", "--config", dest="configfile", type=FileType('r'), help="name of YAML config file") - group.add_argument("--repo", dest="repo", help="URL or path to the repo to search") - group.add_argument("--org", dest="org", help="URL to GitHub org to search") - parser.add_argument("--dco", dest="dco", help="Perform a DCO check (defaults to true)", default=True) - args = parser.parse_args() - - config = {} - if args.configfile: - config = yaml.safe_load(args.configfile) - if 'dco' in config and 'prior_commits' in config['dco'] and 'directory' in config['dco']['prior_commits']: - dco_prior_commits_directory = config['dco']['prior_commits']['directory'] - shutil.rmtree(dco_prior_commits_directory,1) - if 'repo' in config: - args.repo = config['repo'] - elif 'org' in config: - args.org = config['org']['name'] - - repos = [] - if args.org: - orgObj = Org(args.org,load_repos=False) - if 'type' in config['org']: - orgObj.org_type=config['org']['type'] - if 'ignore_repos' in config['org']: - orgObj.ignore_repos = config['org']['ignore_repos'] - if 'only_repos' in config['org']: - orgObj.only_repos=config['org']['only_repos'] - if 'skip_archived' in config['org']: - orgObj.skip_archived=config['org']['skip_archived'] - repos = orgObj.reloadRepos() - - if args.repo: - repos = [Repo(args.repo)] - - for repoObj in repos: - print("Searching repo {}...".format(repoObj.name)) - if 'dco' in config or args.dco: - if 'dco' in config and 'prior_commits_directory' in config['dco']: - repoObj.prior_commits_dir = config['dco']['prior_commits_directory'] - if 'dco' in config and 'signoff_dirs' in config['dco']: - repoObj.loadPastSignoffs(config['dco']['signoff_dirs']) - else: - repoObj.loadPastSignoffs() - repoObj.scan() - print(repoObj.remediations) - - print("This took "+str(datetime.now() - startTime)) - -if __name__ == '__main__': - main() diff --git a/contrib_check/main.py b/contrib_check/main.py index 6870202..db17222 100644 --- a/contrib_check/main.py +++ b/contrib_check/main.py @@ -38,11 +38,11 @@ def main(): if 'repo' in config: args.repo = config['repo'] elif 'org' in config: - args.org = config['org'] + args.org = config['org']['name'] repos = [] if args.org: - orgObj = Org(args.org) + orgObj = Org(args.org, load_repos=False) if 'type' in config['org']: orgObj.org_type = config['org']['type'] if 'ignore_repos' in config['org']: @@ -51,7 +51,7 @@ def main(): orgObj.only_repos = config['org']['only_repos'] if 'skip_archived' in config['org']: orgObj.skip_archived = config['org']['skip_archived'] - repos = orgObj.loadRepos() + repos = orgObj.reloadRepos() if args.repo: repos = [Repo(args.repo)] From 834b21950ad895cffefa4548759e8950907ef1a5 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:50:26 -0400 Subject: [PATCH 14/20] Add test coverage' Signed-off-by: John Mertic --- contrib_check/org.py | 3 +- pyproject.toml | 6 +- tests.py | 183 ------------------------ tests/__init__.py | 0 tests/test_commit.py | 250 ++++++++++++++++++++++++++++++++ tests/test_org.py | 134 +++++++++++++++++ tests/test_repo.py | 333 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 723 insertions(+), 186 deletions(-) delete mode 100755 tests.py create mode 100644 tests/__init__.py create mode 100644 tests/test_commit.py create mode 100644 tests/test_org.py create mode 100644 tests/test_repo.py diff --git a/contrib_check/org.py b/contrib_check/org.py index d70b2e8..c1cf9ec 100644 --- a/contrib_check/org.py +++ b/contrib_check/org.py @@ -8,6 +8,7 @@ import os import socket import re +import time from github import Github, GithubException, RateLimitExceededException from .repo import Repo @@ -65,7 +66,7 @@ def reloadRepos(self): self.repos.append(Repo(gh_repo.html_url)) except RateLimitExceededException: print("Sleeping until we get past the API rate limit....") - time.sleep(g.rate_limiting_resettime-now()) + time.sleep(60) except GithubException as e: if e.status == 502: print("Server error - retrying...") diff --git a/pyproject.toml b/pyproject.toml index 1cb0d87..1b31915 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,16 +13,18 @@ packages = [{ include = "contrib_check" }] contrib-check = "contrib_check.main:main" [tool.poetry.dependencies] -python = "^3.12" +python = "^3.9" PyYAML = ">=6.0.1" GitPython = ">=3.1.50" PyGithub = ">=2.4.0" -alive-progress = "^3.3.0" [tool.poetry.group.dev.dependencies] coverage = ">=7.14.0" pytest = ">=8.0.0" +[tool.pytest.ini_options] +testpaths = ["tests"] + [tool.coverage.run] source = ["contrib_check"] diff --git a/tests.py b/tests.py deleted file mode 100755 index f507d5e..0000000 --- a/tests.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright this project and it's contributors -# SPDX-License-Identifier: Apache-2.0 -# -# encoding=utf8 - -import os -import git -import unittest -from unittest.mock import Mock - -from contrib_check.org import Org -from contrib_check.repo import Repo -from contrib_check.commit import Commit - -class TestCommit(unittest.TestCase): - - @classmethod - def setUpClass(self): - self._mock_repo = Mock() - # Make tree subscript raise KeyError so load_remediation_commit_config - # takes the except KeyError path (no .github/dco.yml present) - self._mock_repo.git_repo_object.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) - self._mock_commit_merge = Mock() - self._mock_commit_merge.parents = [1,2,3] - self._mock_commit = Mock() - self._mock_commit.parents = [1] - - # test for not having a signoff - def testHasNoDCOSignOff(self): - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.message = "has no signoff" - self.assertFalse(commit.hasDCOSignOff(), "Commit message didn't have a signoff") - - # test for having a signoff - def testHasDCOSignOff(self): - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.message = "has a signoff Signed-off-by: John Mertic " - self.assertTrue(commit.hasDCOSignOff(), "Commit message had a signoff") - - def testFoundPastDCOSignoff(self): - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' - commit.repo_object.past_signoffs = [ - "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() - ] - self.assertTrue(commit.hasDCOPastSignoff(), "Commit message had a past signoff") - - def testFoundNoPastDCOSignoff(self): - commit = Commit(self._mock_commit,self._mock_repo) - commit.git_commit_object.hexsha = 'c1d322dfba0ed7a770d74074990ac51a9efedcd0' - commit.repo_object.past_signoffs = [ - "I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() - ] - self.assertFalse(commit.hasDCOPastSignoff(), "Commit message had a past signoff") - - def testDCOSignoffRequiredMergeCommit(self): - commit = Commit(self._mock_commit_merge,self._mock_repo) - self.assertFalse(commit.isDCOSignOffRequired(), "Merge commits don't require a DCO signoff") - - def testDCOSignoffRequiredNormalCommit(self): - commit = Commit(self._mock_commit,self._mock_repo) - self.assertTrue(commit.isDCOSignOffRequired(), "All non-merge commits require a DCO signoff") - - def testDCOSignoffCheckMergeCommit(self): - commit = Commit(self._mock_commit_merge,self._mock_repo) - commit.git_commit_object.message = "has no signoff" - self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but is a merge commit so that's ok") - - def testDCOSignoffCheckNormalCommitNoSignoffPastSignoff(self): - commit = Commit(self._mock_commit_merge,self._mock_repo) - commit.git_commit_object.hexsha = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' - commit.repo_object.past_signoffs = [ - ['dco-signoffs',"I, personname hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: [personname@domain.com]\n\n11ac960e1070eacc2fe92ac9a3d1753400e1fd4b This is a commit".encode() ] - ] - commit.git_commit_object.message = "has no signoff" - self.assertTrue(commit.checkDCOSignoff(), "Commit message didn't have a signoff, but it has a past DCO signoff so that's ok") - - -class TestOrg(unittest.TestCase): - - githubOrgRepos = [ - type("gh_repo",(object,),{ - "html_url": "https://github.com/testorg/repo1", - "name":"repo1", - "archived":False - }), - type("gh_repo",(object,),{ - "html_url": "https://github.com/testorg/repo2", - "name":"repo2", - "archived":False - }), - type("gh_repo",(object,),{ - "html_url": "https://github.com/testorg/repo3", - "name":"repo3", - "archived":True - }), - ] - - @classmethod - def tearDownClass(cls): - if os.path.exists("testorg-repo1.csv"): - os.remove("testorg-repo1.csv") - if os.path.exists("testorg-repo2.csv"): - os.remove("testorg-repo2.csv") - if os.path.exists("testorg-repo3.csv"): - os.remove("testorg-repo3.csv") - - @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) - def testInitNoLoadRepos(self): - org = Org("testorg",load_repos=False) - self.assertEqual(org.repos,[]) - - def testOrgTypeSetGithubNoTokenDefined(self): - names_to_remove = {"GITHUB_TOKEN"} - modified_environ = { - k: v for k, v in os.environ.items() if k not in names_to_remove - } - with unittest.mock.patch.dict(os.environ, modified_environ, clear=True): - self.assertRaisesRegex(Exception,'Github token',Org,'foo') - - @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) - @unittest.mock.patch.object(git.Repo,'clone_from') - def testLoadOrgRepos(self,mock_method): - with unittest.mock.patch.object(Org,'_get_github_repos_for_org',return_value=self.githubOrgRepos) as mock: - org = Org("testorg") - self.assertEqual(org.repos[0].name,"repo1") - self.assertEqual(org.repos[1].name,"repo2") - self.assertEqual(len(org.repos),2) - - @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) - @unittest.mock.patch.object(git.Repo,'clone_from') - def testLoadOrgReposIgnoreRepo(self,mock_method): - with unittest.mock.patch.object(Org,'_get_github_repos_for_org',return_value=self.githubOrgRepos) as mock: - org = Org("testorg",ignore_repos=['repo1']) - self.assertEqual(org.repos[0].name,"repo2") - self.assertEqual(len(org.repos),1) - - @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) - @unittest.mock.patch.object(git.Repo,'clone_from') - def testLoadOrgReposOnlyRepo(self,mock_method): - with unittest.mock.patch.object(Org,'_get_github_repos_for_org',return_value=self.githubOrgRepos) as mock: - org = Org("testorg",only_repos=['repo1']) - self.assertEqual(org.repos[0].name,"repo1") - self.assertEqual(len(org.repos),1) - - @unittest.mock.patch.dict(os.environ,{'GITHUB_TOKEN':'test123'}) - @unittest.mock.patch.object(git.Repo,'clone_from') - def testLoadOrgReposIncludeArchives(self,mock_method): - with unittest.mock.patch.object(Org,'_get_github_repos_for_org',return_value=self.githubOrgRepos) as mock: - org = Org("testorg",skip_archived=False) - self.assertEqual(org.repos[0].name,"repo1") - self.assertEqual(org.repos[1].name,"repo2") - self.assertEqual(org.repos[2].name,"repo3") - self.assertEqual(len(org.repos),3) - - -class TestRepo(unittest.TestCase): - - @unittest.mock.patch.object(git.Repo,'clone_from') - def testInitGithub(self,mock_method): - repo = Repo("https://github.com/foo/bar") - self.assertEqual(repo.name,"bar") - self.assertEqual(repo.html_url,"https://github.com/foo/bar") - self.assertEqual(repo.csv_filename,"foo-bar.csv") - self.assertTrue(os.path.isfile("foo-bar.csv")) - if os.path.isfile("foo-bar.csv"): - os.remove("foo-bar.csv") - - @unittest.mock.patch.object(git.Repo,'clone_from') - def testInitLocal(self,mock_method): - repo = Repo(".") - self.assertEqual(repo.name,os.path.basename(os.path.realpath("."))) - self.assertEqual(repo.html_url,"") - self.assertEqual(repo.csv_filename,repo.name+".csv") - self.assertTrue(os.path.isfile(repo.name+".csv")) - if os.path.isfile(repo.name+".csv"): - os.remove(repo.name+".csv") - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_commit.py b/tests/test_commit.py new file mode 100644 index 0000000..31c9953 --- /dev/null +++ b/tests/test_commit.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +# +# Copyright this project and it's contributors +# SPDX-License-Identifier: Apache-2.0 +# +# encoding=utf8 + +import os +import tempfile +import unittest +from unittest.mock import Mock, mock_open, patch + +from contrib_check.commit import Commit + + +def _make_mock_repo(tree_raises_key_error=True): + """Return a mock repo whose tree[] raises KeyError by default (no dco.yml).""" + mock_repo = Mock() + if tree_raises_key_error: + mock_repo.git_repo_object.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) + return mock_repo + + +def _make_commit(parents=None, mock_repo=None): + if mock_repo is None: + mock_repo = _make_mock_repo() + mock_git_commit = Mock() + mock_git_commit.parents = parents if parents is not None else [1] + return Commit(mock_git_commit, mock_repo) + + +class TestCommitDCOSignOff(unittest.TestCase): + + def test_has_no_dco_signoff(self): + commit = _make_commit() + commit.git_commit_object.message = "has no signoff" + self.assertFalse(commit.hasDCOSignOff()) + + def test_has_dco_signoff(self): + commit = _make_commit() + commit.git_commit_object.message = "fix: thing Signed-off-by: John Mertic " + self.assertTrue(commit.hasDCOSignOff()) + + +class TestCommitDCOPastSignoff(unittest.TestCase): + + SHA = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' + OTHER_SHA = 'c1d322dfba0ed7a770d74074990ac51a9efedcd0' + SIGNOFF_BLOB = ( + "I, personname hereby sign-off-by all of my past commits to this repo " + "subject to the Developer Certificate of Origin (DCO), Version 1.1. " + "In the past I have used emails: [personname@domain.com]\n\n" + f"{SHA} This is a commit" + ).encode() + + def test_found_past_signoff(self): + commit = _make_commit() + commit.git_commit_object.hexsha = self.SHA + commit.repo_object.past_signoffs = [self.SIGNOFF_BLOB] + self.assertTrue(commit.hasDCOPastSignoff()) + + def test_no_past_signoff(self): + commit = _make_commit() + commit.git_commit_object.hexsha = self.OTHER_SHA + commit.repo_object.past_signoffs = [self.SIGNOFF_BLOB] + self.assertFalse(commit.hasDCOPastSignoff()) + + def test_empty_past_signoffs(self): + commit = _make_commit() + commit.git_commit_object.hexsha = self.SHA + commit.repo_object.past_signoffs = [] + self.assertFalse(commit.hasDCOPastSignoff()) + + +class TestCommitDCOSignOffRequired(unittest.TestCase): + + def test_not_required_for_merge_commit(self): + commit = _make_commit(parents=[1, 2, 3]) + self.assertFalse(commit.isDCOSignOffRequired()) + + def test_required_for_normal_commit(self): + commit = _make_commit(parents=[1]) + self.assertTrue(commit.isDCOSignOffRequired()) + + +class TestCommitCheckDCOSignoff(unittest.TestCase): + + SHA = '11ac960e1070eacc2fe92ac9a3d1753400e1fd4b' + SIGNOFF_BLOB = ( + "I, personname hereby sign-off-by all of my past commits to this repo " + "subject to the Developer Certificate of Origin (DCO), Version 1.1. " + "In the past I have used emails: [personname@domain.com]\n\n" + f"{SHA} This is a commit" + ).encode() + + def test_merge_commit_always_passes(self): + commit = _make_commit(parents=[1, 2]) + commit.git_commit_object.message = "no signoff here" + self.assertTrue(commit.checkDCOSignoff()) + + def test_normal_commit_with_signoff_passes(self): + commit = _make_commit(parents=[1]) + commit.git_commit_object.message = "fix: thing Signed-off-by: Jane " + self.assertTrue(commit.checkDCOSignoff()) + + def test_normal_commit_without_signoff_fails(self): + commit = _make_commit(parents=[1]) + commit.git_commit_object.message = "no signoff" + commit.repo_object.past_signoffs = [] + commit.repo_object.git_repo_object.git.rev_parse.return_value = "abc1234" + commit.remediations = [] + self.assertFalse(commit.checkDCOSignoff()) + + def test_normal_commit_with_past_signoff_passes(self): + commit = _make_commit(parents=[1]) + commit.git_commit_object.message = "no signoff" + commit.git_commit_object.hexsha = self.SHA + commit.repo_object.past_signoffs = [self.SIGNOFF_BLOB] + self.assertTrue(commit.checkDCOSignoff()) + + def test_normal_commit_with_remediation_passes(self): + commit = _make_commit(parents=[1]) + commit.git_commit_object.message = "no signoff" + commit.repo_object.past_signoffs = [] + short = "abc1234" + commit.repo_object.git_repo_object.git.rev_parse.return_value = short + commit.remediations = [short] + self.assertTrue(commit.checkDCOSignoff()) + + +class TestCommitHasRemediation(unittest.TestCase): + + def test_has_remediation_match(self): + commit = _make_commit() + short = "abc1234" + commit.repo_object.git_repo_object.git.rev_parse.return_value = short + commit.remediations = [short] + self.assertTrue(commit.has_remediation()) + + def test_has_remediation_no_match(self): + commit = _make_commit() + commit.repo_object.git_repo_object.git.rev_parse.return_value = "abc1234" + commit.remediations = ["zzzzzzz"] + self.assertFalse(commit.has_remediation()) + + +class TestCommitLoadRemediationCommitConfig(unittest.TestCase): + + def test_no_dco_yml_returns_false(self): + """KeyError from tree lookup → returns False, flags stay False.""" + commit = _make_commit() + self.assertFalse(commit.allow_remediation_commit_individual) + self.assertFalse(commit.allow_remediation_commit_thirdparty) + + def test_dco_yml_enables_individual(self): + mock_repo = _make_mock_repo(tree_raises_key_error=False) + dco_yml = "allowRemediationCommits:\n individual: true\n thirdParty: false\n" + mock_blob = Mock() + mock_blob.abspath = "/fake/dco.yml" + mock_repo.git_repo_object.head.commit.tree.__getitem__ = Mock(return_value=mock_blob) + with patch("builtins.open", mock_open(read_data=dco_yml)): + commit = _make_commit(mock_repo=mock_repo) + self.assertTrue(commit.allow_remediation_commit_individual) + self.assertFalse(commit.allow_remediation_commit_thirdparty) + + def test_dco_yml_enables_thirdparty(self): + mock_repo = _make_mock_repo(tree_raises_key_error=False) + dco_yml = "allowRemediationCommits:\n individual: false\n thirdParty: true\n" + mock_blob = Mock() + mock_blob.abspath = "/fake/dco.yml" + mock_repo.git_repo_object.head.commit.tree.__getitem__ = Mock(return_value=mock_blob) + with patch("builtins.open", mock_open(read_data=dco_yml)): + commit = _make_commit(mock_repo=mock_repo) + self.assertFalse(commit.allow_remediation_commit_individual) + self.assertTrue(commit.allow_remediation_commit_thirdparty) + + def test_empty_dco_yml_leaves_flags_false(self): + mock_repo = _make_mock_repo(tree_raises_key_error=False) + mock_blob = Mock() + mock_blob.abspath = "/fake/dco.yml" + mock_repo.git_repo_object.head.commit.tree.__getitem__ = Mock(return_value=mock_blob) + with patch("builtins.open", mock_open(read_data="")): + commit = _make_commit(mock_repo=mock_repo) + self.assertFalse(commit.allow_remediation_commit_individual) + self.assertFalse(commit.allow_remediation_commit_thirdparty) + + +class TestCommitIsRemediationCommit(unittest.TestCase): + + def _commit_with_config(self, individual=False, thirdparty=False): + mock_repo = _make_mock_repo(tree_raises_key_error=False) + flags = {} + if individual: + flags['individual'] = True + if thirdparty: + flags['thirdParty'] = True + dco_yml = f"allowRemediationCommits:\n individual: {str(individual).lower()}\n thirdParty: {str(thirdparty).lower()}\n" + mock_blob = Mock() + mock_blob.abspath = "/fake/dco.yml" + mock_repo.git_repo_object.head.commit.tree.__getitem__ = Mock(return_value=mock_blob) + with patch("builtins.open", mock_open(read_data=dco_yml)): + commit = _make_commit(mock_repo=mock_repo) + return commit + + def test_no_config_not_remediation(self): + commit = _make_commit() + commit.git_commit_object.message = "I, Jane , hereby add my Signed-off-by to this commit: abc1234" + commit.git_commit_object.author.name = "Jane" + commit.git_commit_object.author.email = "jane@example.com" + self.assertFalse(commit.is_remediation_commit()) + + def test_individual_remediation_valid_author(self): + commit = self._commit_with_config(individual=True) + commit.git_commit_object.message = "I, Jane Doe , hereby add my Signed-off-by to this commit: abc1234" + commit.git_commit_object.author.name = "Jane Doe" + commit.git_commit_object.author.email = "jane@example.com" + self.assertTrue(commit.is_remediation_commit()) + self.assertIn("abc1234", commit.remediations) + + def test_individual_remediation_wrong_author(self): + commit = self._commit_with_config(individual=True) + commit.git_commit_object.message = "I, Jane Doe , hereby add my Signed-off-by to this commit: abc1234" + commit.git_commit_object.author.name = "Not Jane" + commit.git_commit_object.author.email = "other@example.com" + self.assertFalse(commit.is_remediation_commit()) + + def test_thirdparty_remediation_valid_author(self): + commit = self._commit_with_config(thirdparty=True) + commit.git_commit_object.message = ( + "On behalf of ACME , I, Bob Smith , " + "hereby add my Signed-off-by to this commit: def5678" + ) + commit.git_commit_object.author.name = "Bob Smith" + commit.git_commit_object.author.email = "bob@acme.com" + self.assertTrue(commit.is_remediation_commit()) + self.assertIn("def5678", commit.remediations) + + def test_thirdparty_remediation_wrong_author(self): + commit = self._commit_with_config(thirdparty=True) + commit.git_commit_object.message = ( + "On behalf of ACME , I, Bob Smith , " + "hereby add my Signed-off-by to this commit: def5678" + ) + commit.git_commit_object.author.name = "Not Bob" + commit.git_commit_object.author.email = "notbob@acme.com" + self.assertFalse(commit.is_remediation_commit()) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_org.py b/tests/test_org.py new file mode 100644 index 0000000..baae648 --- /dev/null +++ b/tests/test_org.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# +# Copyright this project and it's contributors +# SPDX-License-Identifier: Apache-2.0 +# +# encoding=utf8 + +import os +import unittest +from unittest.mock import Mock, patch, MagicMock +import socket + +import git +from github import GithubException, RateLimitExceededException + +from contrib_check.org import Org + + +def _make_gh_repo(name, html_url=None, archived=False): + r = Mock() + r.name = name + r.html_url = html_url or f"https://github.com/testorg/{name}" + r.archived = archived + return r + + +GH_REPOS = [ + _make_gh_repo("repo1"), + _make_gh_repo("repo2"), + _make_gh_repo("repo3", archived=True), +] + + +class TestOrgInit(unittest.TestCase): + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + def test_init_no_load_repos(self): + org = Org("testorg", load_repos=False) + self.assertEqual(org.repos, []) + + def test_init_github_no_token_raises(self): + env = {k: v for k, v in os.environ.items() if k != 'GITHUB_TOKEN'} + with patch.dict(os.environ, env, clear=True): + with self.assertRaisesRegex(Exception, 'Github token'): + Org("testorg") + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + def test_org_name_strips_github_url(self): + org = Org("https://github.com/myorg", load_repos=False) + self.assertEqual(org.org_name, "myorg") + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + def test_org_name_strips_www_github_url(self): + org = Org("https://www.github.com/myorg", load_repos=False) + self.assertEqual(org.org_name, "myorg") + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + def test_org_name_plain_string_unchanged(self): + org = Org("myorg", load_repos=False) + self.assertEqual(org.org_name, "myorg") + + +class TestOrgReloadRepos(unittest.TestCase): + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + @patch.object(git.Repo, 'clone_from') + def test_load_repos_excludes_archived_by_default(self, _clone): + with patch.object(Org, '_get_github_repos_for_org', return_value=GH_REPOS): + org = Org("testorg") + self.assertEqual(len(org.repos), 2) + self.assertEqual(org.repos[0].name, "repo1") + self.assertEqual(org.repos[1].name, "repo2") + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + @patch.object(git.Repo, 'clone_from') + def test_load_repos_includes_archived_when_flag_off(self, _clone): + with patch.object(Org, '_get_github_repos_for_org', return_value=GH_REPOS): + org = Org("testorg", skip_archived=False) + self.assertEqual(len(org.repos), 3) + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + @patch.object(git.Repo, 'clone_from') + def test_ignore_repos(self, _clone): + with patch.object(Org, '_get_github_repos_for_org', return_value=GH_REPOS): + org = Org("testorg", ignore_repos=['repo1']) + self.assertEqual(len(org.repos), 1) + self.assertEqual(org.repos[0].name, "repo2") + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + @patch.object(git.Repo, 'clone_from') + def test_only_repos(self, _clone): + with patch.object(Org, '_get_github_repos_for_org', return_value=GH_REPOS): + org = Org("testorg", only_repos=['repo1']) + self.assertEqual(len(org.repos), 1) + self.assertEqual(org.repos[0].name, "repo1") + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + @patch('contrib_check.org.time.sleep') + def test_rate_limit_exception_prints_message(self, mock_sleep): + with patch.object(Org, '_get_github_repos_for_org', side_effect=RateLimitExceededException(403, "rate limit", {})): + # should not raise; prints a message and returns empty repos + org = Org("testorg") + self.assertEqual(org.repos, []) + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + def test_github_exception_502_prints_retry(self): + exc = GithubException(502, "bad gateway", {}) + with patch.object(Org, '_get_github_repos_for_org', side_effect=exc): + org = Org("testorg") + self.assertEqual(org.repos, []) + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + def test_github_exception_other_prints_data(self): + exc = GithubException(404, "not found", {}) + with patch.object(Org, '_get_github_repos_for_org', side_effect=exc): + org = Org("testorg") + self.assertEqual(org.repos, []) + + @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) + def test_socket_timeout_prints_retry(self): + with patch.object(Org, '_get_github_repos_for_org', side_effect=socket.timeout()): + org = Org("testorg") + self.assertEqual(org.repos, []) + + def tearDown(self): + # clean up any csv files created during tests + for name in ['repo1', 'repo2', 'repo3']: + path = f"testorg-{name}.csv" + if os.path.exists(path): + os.remove(path) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_repo.py b/tests/test_repo.py new file mode 100644 index 0000000..2a4b1b5 --- /dev/null +++ b/tests/test_repo.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +# +# Copyright this project and it's contributors +# SPDX-License-Identifier: Apache-2.0 +# +# encoding=utf8 + +import os +import tempfile +import unittest +from unittest.mock import Mock, patch, MagicMock, call + +import git + +from contrib_check.repo import Repo +from contrib_check.commit import Commit + + +def _make_repo_github(url="https://github.com/foo/bar"): + """Construct a GitHub-style Repo with clone_from mocked out.""" + with patch.object(git.Repo, 'clone_from') as mock_clone: + mock_git_repo = Mock() + mock_git_repo.iter_commits.return_value = [] + mock_git_repo.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) + mock_clone.return_value = mock_git_repo + repo = Repo(url) + return repo + + +def _make_repo_local(path="."): + """Construct a local Repo with git.Repo mocked out.""" + mock_git_repo = Mock() + mock_git_repo.iter_commits.return_value = [] + mock_git_repo.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) + with patch.object(git.Repo, '__init__', return_value=None): + with patch('contrib_check.repo.git.Repo', return_value=mock_git_repo): + repo = Repo(path) + return repo + + +class TestRepoInitGithub(unittest.TestCase): + + def setUp(self): + self.repo = _make_repo_github("https://github.com/foo/bar") + + def tearDown(self): + if os.path.isfile("foo-bar.csv"): + os.remove("foo-bar.csv") + + def test_name(self): + self.assertEqual(self.repo.name, "bar") + + def test_html_url(self): + self.assertEqual(self.repo.html_url, "https://github.com/foo/bar") + + def test_csv_filename(self): + self.assertEqual(self.repo.csv_filename, "foo-bar.csv") + + def test_csv_file_created(self): + self.assertTrue(os.path.isfile("foo-bar.csv")) + + +class TestRepoInitLocal(unittest.TestCase): + + def setUp(self): + self.repo = _make_repo_local(".") + self.expected_name = os.path.basename(os.path.realpath(".")) + + def tearDown(self): + if os.path.isfile(self.expected_name + ".csv"): + os.remove(self.expected_name + ".csv") + + def test_name(self): + self.assertEqual(self.repo.name, self.expected_name) + + def test_html_url_empty(self): + self.assertEqual(self.repo.html_url, "") + + def test_csv_filename(self): + self.assertEqual(self.repo.csv_filename, self.expected_name + ".csv") + + def test_csv_file_created(self): + self.assertTrue(os.path.isfile(self.expected_name + ".csv")) + + +class TestRepoLoadPastSignoffs(unittest.TestCase): + + def setUp(self): + self.repo = _make_repo_github() + + def tearDown(self): + if os.path.isfile("foo-bar.csv"): + os.remove("foo-bar.csv") + + def test_loads_signoffs_from_directory(self): + blob_content = b"some signoff content" + mock_blob = Mock() + + # write content to a real temp file so open() works + with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as tf: + tf.write(blob_content) + tf_name = tf.name + mock_blob.abspath = tf_name + + mock_tree_entry = Mock() + mock_tree_entry.type = 'tree' + mock_tree_entry.name = 'dco-signoffs' + mock_tree_entry.blobs = [mock_blob] + + self.repo.git_repo_object = Mock() + self.repo.git_repo_object.head.commit.tree = [mock_tree_entry] + + self.repo.loadPastSignoffs() + self.assertEqual(self.repo.past_signoffs, [blob_content]) + os.unlink(tf_name) + + def test_skips_non_matching_directories(self): + self.repo.past_signoffs = [] # reset instance list; class-level list can leak between tests + mock_tree_entry = Mock() + mock_tree_entry.type = 'tree' + mock_tree_entry.name = 'other-dir' + + self.repo.git_repo_object = Mock() + self.repo.git_repo_object.head.commit.tree = [mock_tree_entry] + + self.repo.loadPastSignoffs() + self.assertEqual(self.repo.past_signoffs, []) + + def test_invalid_repo_returns_false(self): + self.repo.git_repo_object = Mock() + self.repo.git_repo_object.head.commit.tree.__iter__ = Mock(side_effect=ValueError) + result = self.repo.loadPastSignoffs() + self.assertFalse(result) + + +class TestRepoLoadRemediationCommits(unittest.TestCase): + + def setUp(self): + self.repo = _make_repo_github() + + def tearDown(self): + if os.path.isfile("foo-bar.csv"): + os.remove("foo-bar.csv") + + def test_collects_remediations_from_commits(self): + mock_git_commit = Mock() + mock_git_commit.parents = [1] + + self.repo.git_repo_object = Mock() + self.repo.git_repo_object.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) + self.repo.git_repo_object.iter_commits.return_value = [mock_git_commit] + + with patch.object(Commit, 'is_remediation_commit', return_value=True): + with patch.object(Commit, '__init__', return_value=None) as mock_init: + # can't easily patch __init__ and keep remediations; test via integration + pass + + # integration-style: let a real Commit run (no dco.yml → no remediations) + self.repo.git_repo_object.iter_commits.return_value = [mock_git_commit] + self.repo.remediations = [] + self.repo.load_remediation_commits() + # No dco.yml config → is_remediation_commit returns False → remediations stays empty + self.assertEqual(self.repo.remediations, []) + + +class TestRepoScan(unittest.TestCase): + + def setUp(self): + self.repo = _make_repo_github() + + def tearDown(self): + for f in ["foo-bar.csv"]: + if os.path.isfile(f): + os.remove(f) + import shutil + if os.path.isdir("dco-signoffs"): + shutil.rmtree("dco-signoffs") + + def test_scan_writes_error_for_unsigned_commit(self): + mock_git_commit = Mock() + mock_git_commit.parents = [1] + mock_git_commit.message = "no signoff" + mock_git_commit.hexsha = "aabbccdd" * 5 + mock_git_commit.author.name = "Dev" + mock_git_commit.author.email = "dev@example.com" + mock_git_commit.authored_datetime = "2024-01-01" + + self.repo.git_repo_object = Mock() + self.repo.git_repo_object.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) + self.repo.git_repo_object.iter_commits.return_value = [mock_git_commit] + self.repo.git_repo_object.git.rev_parse.return_value = "aabbccd" + self.repo.past_signoffs = [] + self.repo.remediations = [] + + self.repo.scan() + + # flush the still-open csv writer before reading + self.repo._Repo__csvfileref.flush() + + with open("foo-bar.csv") as f: + content = f.read() + self.assertIn("no signoff", content) + self.assertIn("dco", content) + + def test_scan_no_error_for_signed_commit(self): + mock_git_commit = Mock() + mock_git_commit.parents = [1] + mock_git_commit.message = "fix Signed-off-by: Dev " + + self.repo.git_repo_object = Mock() + self.repo.git_repo_object.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) + self.repo.git_repo_object.iter_commits.return_value = [mock_git_commit] + self.repo.past_signoffs = [] + self.repo.remediations = [] + + self.repo.scan() + + with open("foo-bar.csv") as f: + content = f.read() + self.assertEqual(content.strip(), "") + + def test_scan_no_error_for_merge_commit(self): + mock_git_commit = Mock() + mock_git_commit.parents = [1, 2] + mock_git_commit.message = "Merge branch x into y" + + self.repo.git_repo_object = Mock() + self.repo.git_repo_object.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) + self.repo.git_repo_object.iter_commits.return_value = [mock_git_commit] + + self.repo.scan() + + with open("foo-bar.csv") as f: + content = f.read() + self.assertEqual(content.strip(), "") + + +class TestRepoWriteDCOPriorCommitsFile(unittest.TestCase): + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.repo = _make_repo_github() + self.repo.prior_commits_dir = os.path.join(self.tmpdir, "dco-signoffs") + self.repo.name = "myrepo" + + def tearDown(self): + import shutil + shutil.rmtree(self.tmpdir, ignore_errors=True) + if os.path.isfile("foo-bar.csv"): + os.remove("foo-bar.csv") + + def _make_commit_obj(self, hexsha, message, author_name, author_email): + commit = Mock() + commit.git_commit_object.hexsha = hexsha + commit.git_commit_object.message = message + commit.git_commit_object.author.name = author_name + commit.git_commit_object.author.email = author_email + return commit + + def test_creates_new_file(self): + commit = self._make_commit_obj("abc123", "fix bug", "Alice", "alice@example.com") + self.repo.write_dco_prior_commits_file(commit) + + expected = os.path.join(self.repo.prior_commits_dir, "myrepo", "Alice-myrepo.txt") + self.assertTrue(os.path.isfile(expected)) + with open(expected) as f: + content = f.read() + self.assertIn("Alice", content) + self.assertIn("abc123", content) + + def test_appends_to_existing_file(self): + commit1 = self._make_commit_obj("abc123", "first", "Alice", "alice@example.com") + commit2 = self._make_commit_obj("def456", "second", "Alice", "alice@example.com") + self.repo.write_dco_prior_commits_file(commit1) + self.repo.write_dco_prior_commits_file(commit2) + + expected = os.path.join(self.repo.prior_commits_dir, "myrepo", "Alice-myrepo.txt") + with open(expected) as f: + content = f.read() + self.assertIn("abc123", content) + self.assertIn("def456", content) + + +class TestRepoWriteIndividualRemediationCommit(unittest.TestCase): + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.repo = _make_repo_github() + self.repo.remediation_commits_dir = os.path.join(self.tmpdir, "remediation-commits") + self.repo.name = "myrepo" + self.repo.git_repo_object = Mock() + self.repo.git_repo_object.git.rev_parse.return_value = "abc1234" + + def tearDown(self): + import shutil + shutil.rmtree(self.tmpdir, ignore_errors=True) + if os.path.isfile("foo-bar.csv"): + os.remove("foo-bar.csv") + + def _make_commit_obj(self, hexsha="fullhash", author_name="Alice", author_email="alice@example.com"): + commit = Mock() + commit.git_commit_object.hexsha = hexsha + commit.git_commit_object.author.name = author_name + commit.git_commit_object.author.email = author_email + return commit + + def test_creates_new_remediation_file(self): + commit = self._make_commit_obj() + self.repo.write_individual_remediation_commit(commit) + + expected = os.path.join(self.repo.remediation_commits_dir, "myrepo-Alice.txt") + self.assertTrue(os.path.isfile(expected)) + with open(expected) as f: + content = f.read() + self.assertIn("Alice", content) + self.assertIn("abc1234", content) + + def test_appends_to_existing_remediation_file(self): + commit1 = self._make_commit_obj(hexsha="hash1") + commit2 = self._make_commit_obj(hexsha="hash2") + self.repo.git_repo_object.git.rev_parse.side_effect = ["short1", "short2"] + self.repo.write_individual_remediation_commit(commit1) + self.repo.write_individual_remediation_commit(commit2) + + expected = os.path.join(self.repo.remediation_commits_dir, "myrepo-Alice.txt") + with open(expected) as f: + content = f.read() + self.assertIn("short1", content) + self.assertIn("short2", content) + + +if __name__ == '__main__': + unittest.main() From c1f2133f1c35cf19b1c9cdf63767b9756c922f81 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:53:03 -0400 Subject: [PATCH 15/20] Fix build error Signed-off-by: John Mertic --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1b31915..e6b456f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ packages = [{ include = "contrib_check" }] contrib-check = "contrib_check.main:main" [tool.poetry.dependencies] -python = "^3.9" +python = "^3.12" PyYAML = ">=6.0.1" GitPython = ">=3.1.50" PyGithub = ">=2.4.0" From 7385d5737f09f6cec917e8cc648432d549df71a6 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:55:14 -0400 Subject: [PATCH 16/20] Fix build error Signed-off-by: John Mertic --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a961fe..8a1921f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -47,7 +47,7 @@ jobs: - name: Run tests run: | - poetry run coverage run -m pytest tests.py + poetry run coverage run -m pytest tests/ poetry run coverage xml - name: SonarCloud Scan From da4b2156dee011d5c8af5dabffb1e61ee1fdb676 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Sun, 17 May 2026 09:56:54 -0400 Subject: [PATCH 17/20] Re-add alive_progress Signed-off-by: John Mertic --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index e6b456f..535d302 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ python = "^3.12" PyYAML = ">=6.0.1" GitPython = ">=3.1.50" PyGithub = ">=2.4.0" +alive-progress = "^3.3.0" [tool.poetry.group.dev.dependencies] coverage = ">=7.14.0" From 57b8492cf4071075b7bb6098118f62941386239c Mon Sep 17 00:00:00 2001 From: John Mertic Date: Mon, 18 May 2026 17:07:10 -0400 Subject: [PATCH 18/20] Fix test coverage Signed-off-by: John Mertic --- contrib_check/commit.py | 12 +- contrib_check/main.py | 6 +- contrib_check/org.py | 20 +++- contrib_check/repo.py | 161 +++++++++++++------------ pyproject.toml | 1 + tests/test_commit.py | 24 ++-- tests/test_gitremoteprogress.py | 52 ++++++++ tests/test_org.py | 205 +++++++++++++++----------------- tests/test_repo.py | 123 ++++++++++++++++++- 9 files changed, 386 insertions(+), 218 deletions(-) create mode 100644 tests/test_gitremoteprogress.py diff --git a/contrib_check/commit.py b/contrib_check/commit.py index 79beedf..fdae645 100644 --- a/contrib_check/commit.py +++ b/contrib_check/commit.py @@ -37,19 +37,19 @@ def __init__(self, git_commit_object, repo_object): self.load_remediation_commit_config() - def checkDCOSignoff(self): - if self.isDCOSignOffRequired(): - return self.hasDCOSignOff() or self.hasDCOPastSignoff() or self.has_remediation() + def check_dco_signoff(self): + if self.is_dco_signoff_required(): + return self.has_dco_signoff() or self.has_dco_past_signoff() or self.has_remediation() return True - def isDCOSignOffRequired(self): + def is_dco_signoff_required(self): return not self.is_merge_commit - def hasDCOSignOff(self): + def has_dco_signoff(self): return (re.search("Signed-off-by: (.+)",self.git_commit_object.message) != None) - def hasDCOPastSignoff(self): + def has_dco_past_signoff(self): for signoff in self.repo_object.past_signoffs: if (re.search(self.git_commit_object.hexsha.encode(),signoff) != None): return True diff --git a/contrib_check/main.py b/contrib_check/main.py index db17222..d9a36e1 100644 --- a/contrib_check/main.py +++ b/contrib_check/main.py @@ -51,7 +51,7 @@ def main(): orgObj.only_repos = config['org']['only_repos'] if 'skip_archived' in config['org']: orgObj.skip_archived = config['org']['skip_archived'] - repos = orgObj.reloadRepos() + repos = orgObj.reload_repos() if args.repo: repos = [Repo(args.repo)] @@ -62,9 +62,9 @@ def main(): if 'dco' in config and 'prior_commits_directory' in config['dco']: repoObj.prior_commits_dir = config['dco']['prior_commits_directory'] if 'dco' in config and 'signoff_dirs' in config['dco']: - repoObj.loadPastSignoffs(config['dco']['signoff_dirs']) + repoObj.load_past_signoffs(config['dco']['signoff_dirs']) else: - repoObj.loadPastSignoffs() + repoObj.load_past_signoffs() repoObj.scan() print("This took " + str(datetime.now() - startTime)) diff --git a/contrib_check/org.py b/contrib_check/org.py index c1cf9ec..83cb2a6 100644 --- a/contrib_check/org.py +++ b/contrib_check/org.py @@ -23,14 +23,22 @@ class Org(): lazy_load = False repos = [] - def __init__(self, org_name, org_type='github', ignore_repos=[], only_repos=[], skip_archived=True, load_repos=True): + def __init__(self, org_name, org_type='github', ignore_repos=None, only_repos=None, skip_archived=True, load_repos=True): + # Properly bound mutable states to this exact instance + self.ignore_repos = ignore_repos if ignore_repos is not None else [] + self.only_repos = only_repos if only_repos is not None else [] + self.repos = [] + + self.__org_name = '' + self.__org_type = 'github' + self.skip_archived = skip_archived + + # Execute properties assignments to trigger setters validation self.org_type = org_type self.org_name = org_name - self.ignore_repos = ignore_repos - self.only_repos = only_repos - self.skip_archived = skip_archived + if load_repos: - self.reloadRepos() + self.reload_repos() @property def org_name(self): @@ -50,7 +58,7 @@ def org_type(self, org_type): raise Exception('Github token is not defined. Set GITHUB_TOKEN environment variable to a valid Github token') self.__org_type = org_type - def reloadRepos(self): + def reload_repos(self): self.repos = [] if self.org_type == 'github': diff --git a/contrib_check/repo.py b/contrib_check/repo.py index 429e5d2..ca3ae16 100644 --- a/contrib_check/repo.py +++ b/contrib_check/repo.py @@ -7,6 +7,7 @@ # # Thin layer that uses GitPython.Repo.Base but adds some metadata and the past signoffs # + from __future__ import annotations import os @@ -22,50 +23,52 @@ from .commit import Commit class Repo(): - - name = '' - html_url = '' - past_signoffs = [] - remediations = [] - git_repo_object = None + # Class-level immutable defaults (Safe) prior_commits_dir = 'dco-signoffs' remediation_commits_dir = 'remediation-commits' - checks = { - 'dco': True - } - - error_types = { - 'dco': 'The commit did not have a DCO Signoff' - } - - __csv_filename = 'output.csv' - __csv_writer = None - __fo = None - __csvfileref = None + checks = { 'dco': True } + error_types = { 'dco': 'The commit did not have a DCO Signoff' } def __init__(self, repo_path): + # Properly scope mutable collections to the INSTANCE + self.name = '' + self.html_url = '' + self.past_signoffs = [] + self.remediations = [] + self.git_repo_object = None + + self.__csv_filename = 'output.csv' + self.__csv_writer = None + self.__fo = None + self.__csvfileref = None # Skip LFS files - we don't need to download them - os.environ["GIT_LFS_SKIP_SMUDGE"]="1" + os.environ["GIT_LFS_SKIP_SMUDGE"] = "1" # if GitHub, we can find what we need - url_search = re.search("https://github.com/(.*)/(.*)",repo_path) + url_search = re.search(r"https://github\.com/(.*)/(.*)", repo_path) if url_search: self.html_url = repo_path self.name = url_search.group(2) self.__fo = tempfile.TemporaryDirectory() - print("Cloning repo %s" %self.html_url) - self.git_repo_object = git.Repo.clone_from(self.html_url,self.__fo.name,progress=GitRemoteProgress()) - self.csv_filename = url_search.group(1)+'-'+self.name+'.csv' + print(f"Cloning repo {self.html_url}") + self.git_repo_object = git.Repo.clone_from( + self.html_url, self.__fo.name, progress=GitRemoteProgress() + ) + self.csv_filename = f"{url_search.group(1)}-{self.name}.csv" # local clone elif os.path.isdir(repo_path): self.name = os.path.basename(os.path.realpath(repo_path)) self.git_repo_object = git.Repo(repo_path) - self.csv_filename = self.name+'.csv' + self.csv_filename = f"{self.name}.csv" + self.load_remediation_commits() - def loadPastSignoffs(self, dco_signoffs_directories = ["dco-signoffs"]): + def load_past_signoffs(self, dco_signoffs_directories=None): + if dco_signoffs_directories is None: + dco_signoffs_directories = ["dco-signoffs"] + try: for entry in self.git_repo_object.head.commit.tree: if entry.type == 'tree' and entry.name in dco_signoffs_directories: @@ -73,20 +76,25 @@ def loadPastSignoffs(self, dco_signoffs_directories = ["dco-signoffs"]): with open(blob.abspath, 'rb') as content_file: content = content_file.read() self.past_signoffs.append(content) - except ValueError: + except (ValueError, AttributeError): print("...invalid or empty repo - skipping") return False + return True def load_remediation_commits(self): + if not self.git_repo_object: + return for commit in self.git_repo_object.iter_commits(): commit_obj = Commit(commit, self) if commit_obj.is_remediation_commit(): self.remediations.extend(commit_obj.remediations) def scan(self): + if not self.git_repo_object: + return for commit in self.git_repo_object.iter_commits(): commit_obj = Commit(commit, self) - if 'dco' in self.checks and not commit_obj.checkDCOSignoff(): + if 'dco' in self.checks and not commit_obj.check_dco_signoff(): self.write_error(commit_obj, 'dco') @property @@ -95,21 +103,37 @@ def csv_filename(self): @csv_filename.setter def csv_filename(self, csvfile): - # remove file if there already + # Safely clear out any old references first + if self.__csvfileref: + self.__csvfileref.close() + if os.path.isfile(csvfile): os.remove(csvfile) - self.__csvfileref = open(csvfile, mode='w') - self.__csv_writer = csv.writer(self.__csvfileref, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) + # We keep this reference open because write_error needs continuous access + self.__csvfileref = open(csvfile, mode='w', encoding='utf-8', newline='') + self.__csv_writer = csv.writer( + self.__csvfileref, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL + ) self.__csv_filename = csvfile - def __del__(self): - if self.__fo: - self.__fo.cleanup() + def close(self): + """Explicit cleanup method to ensure resources drain properly.""" if self.__csvfileref: self.__csvfileref.close() + self.__csvfileref = None + if self.__fo: + self.__fo.cleanup() + self.__fo = None + + def __del__(self): + # Fallback safety net + self.close() def write_error(self, commit, error_type): + if not self.__csv_writer: + raise RuntimeError("CSV file has not been initialized via csv_filename setter.") + self.__csv_writer.writerow([ self.name, commit.git_commit_object.hexsha, @@ -125,51 +149,37 @@ def write_error(self, commit, error_type): self.write_dco_prior_commits_file(commit) def write_individual_remediation_commit(self, commit): - if not os.path.exists(self.remediation_commits_dir): - os.mkdir(self.remediation_commits_dir) + os.makedirs(self.remediation_commits_dir, exist_ok=True) - remediationfilename = "{}/{}-{}.txt".format(self.remediation_commits_dir, self.name, commit.git_commit_object.author.name) + remediationfilename = os.path.join( + self.remediation_commits_dir, f"{self.name}-{commit.git_commit_object.author.name}.txt" + ) short_hash = self.git_repo_object.git.rev_parse(commit.git_commit_object.hexsha, short="7") - if not os.path.isfile(remediationfilename): - fh = open(remediationfilename, mode='w+') - fh.write("DCO Remediation Commit for {} <{}>\n\n".format(commit.git_commit_object.author.name, commit.git_commit_object.author.email)) - else: - fh = open(remediationfilename, mode='a') + mode = 'a' if os.path.isfile(remediationfilename) else 'w+' - fh.write("I, {} <{}>, hereby add my Signed-off-by to this commit: {}\n".format(commit.git_commit_object.author.name, commit.git_commit_object.author.email, short_hash)) - fh.close() + with open(remediationfilename, mode=mode, encoding='utf-8') as fh: + if mode == 'w+': + fh.write(f"DCO Remediation Commit for {commit.git_commit_object.author.name} <{commit.git_commit_object.author.email}>\n\n") + fh.write(f"I, {commit.git_commit_object.author.name} <{commit.git_commit_object.author.email}>, hereby add my Signed-off-by to this commit: {short_hash}\n") def write_dco_prior_commits_file(self, commit): - if not os.path.exists(self.prior_commits_dir): - os.mkdir(self.prior_commits_dir) - - if not os.path.exists(self.prior_commits_dir+'/'+self.name): - os.mkdir(self.prior_commits_dir+'/'+self.name) - - commitfilename = self.prior_commits_dir+'/'+self.name+'/'+commit.git_commit_object.author.name+'-'+self.name+'.txt' + target_dir = os.path.join(self.prior_commits_dir, self.name) + os.makedirs(target_dir, exist_ok=True) - if not os.path.isfile(commitfilename): - fh = open(commitfilename, mode='w+') - fh.write("I, "+commit.git_commit_object.author.name+" hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: "+commit.git_commit_object.author.email+"\n\n") - else: - fh = open(commitfilename, mode='a') + commitfilename = os.path.join(target_dir, f"{commit.git_commit_object.author.name}-{self.name}.txt") + mode = 'a' if os.path.isfile(commitfilename) else 'w+' - fh.write(commit.git_commit_object.hexsha+" "+commit.git_commit_object.message+"\n") - fh.close() + with open(commitfilename, mode=mode, encoding='utf-8') as fh: + if mode == 'w+': + fh.write(f"I, {commit.git_commit_object.author.name} hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: {commit.git_commit_object.author.email}\n\n") + fh.write(f"{commit.git_commit_object.hexsha} {commit.git_commit_object.message}\n") class GitRemoteProgress(git.RemoteProgress): OP_CODES = [ - "BEGIN", - "CHECKING_OUT", - "COMPRESSING", - "COUNTING", - "END", - "FINDING_SOURCES", - "RECEIVING", - "RESOLVING", - "WRITING", + "BEGIN", "CHECKING_OUT", "COMPRESSING", "COUNTING", "END", + "FINDING_SOURCES", "RECEIVING", "RESOLVING", "WRITING" ] OP_CODE_MAP = { getattr(git.RemoteProgress, _op_code): _op_code for _op_code in OP_CODES @@ -178,11 +188,10 @@ class GitRemoteProgress(git.RemoteProgress): def __init__(self) -> None: super().__init__() self.alive_bar_instance = None + self.bar = None @classmethod def get_curr_op(cls, op_code: int) -> str: - """Get OP name from OP code.""" - # Remove BEGIN- and END-flag and get op name op_code_masked = op_code & cls.OP_MASK return cls.OP_CODE_MAP.get(op_code_masked, "?").title() @@ -194,25 +203,25 @@ def update( message: str | None = "", ) -> None: cur_count = float(cur_count) - max_count = float(max_count) + max_count = float(max_count) if max_count is not None else 100.0 - # Start new bar on each BEGIN-flag if op_code & self.BEGIN: self.curr_op = self.get_curr_op(op_code) self._dispatch_bar(title=self.curr_op) - self.bar(cur_count / max_count) - self.bar.text(message) + if self.bar: + self.bar(cur_count / max_count) + self.bar.text(str(message or "")) - # End progress monitoring on each END-flag if op_code & git.RemoteProgress.END: self._destroy_bar() def _dispatch_bar(self, title: str | None = "") -> None: - """Create a new progress bar""" self.alive_bar_instance = alive_bar(manual=True, title=title) self.bar = self.alive_bar_instance.__enter__() def _destroy_bar(self) -> None: - """Destroy an existing progress bar""" - self.alive_bar_instance.__exit__(None, None, None) + if self.alive_bar_instance: + self.alive_bar_instance.__exit__(None, None, None) + self.alive_bar_instance = None + self.bar = None diff --git a/pyproject.toml b/pyproject.toml index 535d302..d73037e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ alive-progress = "^3.3.0" [tool.poetry.group.dev.dependencies] coverage = ">=7.14.0" pytest = ">=8.0.0" +pytest-cov = "^7.1.0" [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/tests/test_commit.py b/tests/test_commit.py index 31c9953..7033b22 100644 --- a/tests/test_commit.py +++ b/tests/test_commit.py @@ -34,12 +34,12 @@ class TestCommitDCOSignOff(unittest.TestCase): def test_has_no_dco_signoff(self): commit = _make_commit() commit.git_commit_object.message = "has no signoff" - self.assertFalse(commit.hasDCOSignOff()) + self.assertFalse(commit.has_dco_signoff()) def test_has_dco_signoff(self): commit = _make_commit() commit.git_commit_object.message = "fix: thing Signed-off-by: John Mertic " - self.assertTrue(commit.hasDCOSignOff()) + self.assertTrue(commit.has_dco_signoff()) class TestCommitDCOPastSignoff(unittest.TestCase): @@ -57,30 +57,30 @@ def test_found_past_signoff(self): commit = _make_commit() commit.git_commit_object.hexsha = self.SHA commit.repo_object.past_signoffs = [self.SIGNOFF_BLOB] - self.assertTrue(commit.hasDCOPastSignoff()) + self.assertTrue(commit.has_dco_past_signoff()) def test_no_past_signoff(self): commit = _make_commit() commit.git_commit_object.hexsha = self.OTHER_SHA commit.repo_object.past_signoffs = [self.SIGNOFF_BLOB] - self.assertFalse(commit.hasDCOPastSignoff()) + self.assertFalse(commit.has_dco_past_signoff()) def test_empty_past_signoffs(self): commit = _make_commit() commit.git_commit_object.hexsha = self.SHA commit.repo_object.past_signoffs = [] - self.assertFalse(commit.hasDCOPastSignoff()) + self.assertFalse(commit.has_dco_past_signoff()) class TestCommitDCOSignOffRequired(unittest.TestCase): def test_not_required_for_merge_commit(self): commit = _make_commit(parents=[1, 2, 3]) - self.assertFalse(commit.isDCOSignOffRequired()) + self.assertFalse(commit.is_dco_signoff_required()) def test_required_for_normal_commit(self): commit = _make_commit(parents=[1]) - self.assertTrue(commit.isDCOSignOffRequired()) + self.assertTrue(commit.is_dco_signoff_required()) class TestCommitCheckDCOSignoff(unittest.TestCase): @@ -96,12 +96,12 @@ class TestCommitCheckDCOSignoff(unittest.TestCase): def test_merge_commit_always_passes(self): commit = _make_commit(parents=[1, 2]) commit.git_commit_object.message = "no signoff here" - self.assertTrue(commit.checkDCOSignoff()) + self.assertTrue(commit.check_dco_signoff()) def test_normal_commit_with_signoff_passes(self): commit = _make_commit(parents=[1]) commit.git_commit_object.message = "fix: thing Signed-off-by: Jane " - self.assertTrue(commit.checkDCOSignoff()) + self.assertTrue(commit.check_dco_signoff()) def test_normal_commit_without_signoff_fails(self): commit = _make_commit(parents=[1]) @@ -109,14 +109,14 @@ def test_normal_commit_without_signoff_fails(self): commit.repo_object.past_signoffs = [] commit.repo_object.git_repo_object.git.rev_parse.return_value = "abc1234" commit.remediations = [] - self.assertFalse(commit.checkDCOSignoff()) + self.assertFalse(commit.check_dco_signoff()) def test_normal_commit_with_past_signoff_passes(self): commit = _make_commit(parents=[1]) commit.git_commit_object.message = "no signoff" commit.git_commit_object.hexsha = self.SHA commit.repo_object.past_signoffs = [self.SIGNOFF_BLOB] - self.assertTrue(commit.checkDCOSignoff()) + self.assertTrue(commit.check_dco_signoff()) def test_normal_commit_with_remediation_passes(self): commit = _make_commit(parents=[1]) @@ -125,7 +125,7 @@ def test_normal_commit_with_remediation_passes(self): short = "abc1234" commit.repo_object.git_repo_object.git.rev_parse.return_value = short commit.remediations = [short] - self.assertTrue(commit.checkDCOSignoff()) + self.assertTrue(commit.check_dco_signoff()) class TestCommitHasRemediation(unittest.TestCase): diff --git a/tests/test_gitremoteprogress.py b/tests/test_gitremoteprogress.py new file mode 100644 index 0000000..0ecb056 --- /dev/null +++ b/tests/test_gitremoteprogress.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# +# Copyright this project and it's contributors +# SPDX-License-Identifier: Apache-2.0 +# +# encoding=utf8 + +import unittest +from unittest.mock import MagicMock, patch +import git +from contrib_check.repo import GitRemoteProgress + +class TestGitRemoteProgressCoverage(unittest.TestCase): + + def test_progress_update_with_missing_max_count(self): + """Fixes max_count fallback assignment path: passes None explicitly.""" + progress = GitRemoteProgress() + # Mock out internal bar dependencies so it doesn't crash on print updates + progress.bar = MagicMock() + + # Pass None explicitly as max_count + progress.update(op_code=2, cur_count=50, max_count=None, message="Fetching") + + # Verify the calculation fell back to 100.0 safely (50 / 100.0 = 0.5) + progress.bar.assert_called_once_with(0.5) + + @patch('contrib_check.repo.alive_bar') + def test_progress_lifecycle_and_missing_bar_guard(self, mock_alive_bar): + """Tests the full BEGIN/END cycle and explicitly covers the missing bar guard path.""" + progress = GitRemoteProgress() + + # Scenario A: Update is called WITHOUT a BEGIN flag while progress.bar is uninitialized. + # This forces 'if self.bar:' to evaluate to False, clearing that branch gap. + progress.update(op_code=0, cur_count=10, max_count=100, message="Quiet update") + self.assertIsNone(progress.bar) + + # Scenario B: Fire a proper BEGIN sequence to check bar initialization setup. + # git.RemoteProgress.BEGIN is a bitmask flag equal to 16 + progress.update(op_code=git.RemoteProgress.BEGIN, cur_count=0, max_count=100, message="Starting") + self.assertIsNotNone(progress.bar) + + # Scenario C: Fire an END sequence to test structural context exit tracking. + # git.RemoteProgress.END is a bitmask flag equal to 32 + progress.update(op_code=git.RemoteProgress.END, cur_count=100, max_count=100, message="Done") + self.assertIsNone(progress.bar) + self.assertIsNone(progress.alive_bar_instance) + + def test_get_curr_op_unknown_code(self): + """Verifies the fallback title parsing behavior when an unexpected op code hits.""" + # Mix an out-of-bounds op_code value + op_title = GitRemoteProgress.get_curr_op(9999) + self.assertEqual(op_title, "?") diff --git a/tests/test_org.py b/tests/test_org.py index baae648..784d0e8 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -4,131 +4,116 @@ # SPDX-License-Identifier: Apache-2.0 # # encoding=utf8 +# import os import unittest -from unittest.mock import Mock, patch, MagicMock -import socket - -import git -from github import GithubException, RateLimitExceededException - +from unittest.mock import MagicMock, patch +from github import RateLimitExceededException, GithubException from contrib_check.org import Org +class TestOrgCoverage(unittest.TestCase): -def _make_gh_repo(name, html_url=None, archived=False): - r = Mock() - r.name = name - r.html_url = html_url or f"https://github.com/testorg/{name}" - r.archived = archived - return r - + def setUp(self): + # Seed the token environment variable required by the setter validation + os.environ['GITHUB_TOKEN'] = 'fake_secure_token' -GH_REPOS = [ - _make_gh_repo("repo1"), - _make_gh_repo("repo2"), - _make_gh_repo("repo3", archived=True), -] + def tearDown(self): + # Clear it out so it doesn't pollute real execution states + if 'GITHUB_TOKEN' in os.environ: + del os.environ['GITHUB_TOKEN'] + + def test_init_missing_token_exception(self): + """Verifies an explicit error is raised if GITHUB_TOKEN is absent.""" + del os.environ['GITHUB_TOKEN'] + with self.assertRaises(Exception) as context: + Org("my-org", org_type="github") + self.assertIn("Github token is not defined", str(context.exception)) + + @patch('contrib_check.org.Org._get_github_repos_for_org') + def test_reload_repos_skip_non_github_type(self, mock_get_repos): + """Fixes 56 ↛ 78 in original: Skips the github logic block entirely when type differs.""" + org = Org("my-org", load_repos=False) + org._Org__org_type = 'custom_git' + + result = org.reload_repos() + self.assertEqual(result, []) + mock_get_repos.assert_not_called() + + @patch('contrib_check.org.Repo') + @patch('contrib_check.org.Org._get_github_repos_for_org') + def test_reload_repos_filters_and_loops(self, mock_get_repos, mock_repo_class): + """Exercises every loop condition inside reload_repos (ignore, only, and archived filters).""" + mock_repo_1 = MagicMock() + mock_repo_1.name = "ignored-project" + mock_repo_1.archived = False + + mock_repo_2 = MagicMock() + mock_repo_2.name = "skipped-project" + mock_repo_2.archived = False + + mock_repo_3 = MagicMock() + mock_repo_3.name = "archived-project" + mock_repo_3.archived = True + + mock_repo_4 = MagicMock() + mock_repo_4.name = "valid-project" + mock_repo_4.archived = False + mock_repo_4.html_url = "https://github.com/my-org/valid-project" + + mock_get_repos.return_value = [mock_repo_1, mock_repo_2, mock_repo_3, mock_repo_4] + + org = Org( + org_name="my-org", + ignore_repos=["ignored-project"], + only_repos=["valid-project"], + skip_archived=True + ) + self.assertEqual(len(org.repos), 1) + mock_repo_class.assert_called_once_with("https://github.com/my-org/valid-project") -class TestOrgInit(unittest.TestCase): + @patch('contrib_check.org.Repo') + @patch('contrib_check.org.Org._get_github_repos_for_org') + def test_reload_repos_include_archived_when_disabled(self, mock_get_repos, mock_repo_class): + """Fixes 72 ↛ 73: Verifies archived repos are NOT skipped if skip_archived=False.""" + mock_archived_repo = MagicMock() + mock_archived_repo.name = "old-archived-project" + mock_archived_repo.archived = True + mock_archived_repo.html_url = "https://github.com/my-org/old-archived-project" - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - def test_init_no_load_repos(self): - org = Org("testorg", load_repos=False) - self.assertEqual(org.repos, []) + mock_get_repos.return_value = [mock_archived_repo] - def test_init_github_no_token_raises(self): - env = {k: v for k, v in os.environ.items() if k != 'GITHUB_TOKEN'} - with patch.dict(os.environ, env, clear=True): - with self.assertRaisesRegex(Exception, 'Github token'): - Org("testorg") - - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - def test_org_name_strips_github_url(self): - org = Org("https://github.com/myorg", load_repos=False) - self.assertEqual(org.org_name, "myorg") - - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - def test_org_name_strips_www_github_url(self): - org = Org("https://www.github.com/myorg", load_repos=False) - self.assertEqual(org.org_name, "myorg") - - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - def test_org_name_plain_string_unchanged(self): - org = Org("myorg", load_repos=False) - self.assertEqual(org.org_name, "myorg") - - -class TestOrgReloadRepos(unittest.TestCase): - - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - @patch.object(git.Repo, 'clone_from') - def test_load_repos_excludes_archived_by_default(self, _clone): - with patch.object(Org, '_get_github_repos_for_org', return_value=GH_REPOS): - org = Org("testorg") - self.assertEqual(len(org.repos), 2) - self.assertEqual(org.repos[0].name, "repo1") - self.assertEqual(org.repos[1].name, "repo2") - - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - @patch.object(git.Repo, 'clone_from') - def test_load_repos_includes_archived_when_flag_off(self, _clone): - with patch.object(Org, '_get_github_repos_for_org', return_value=GH_REPOS): - org = Org("testorg", skip_archived=False) - self.assertEqual(len(org.repos), 3) - - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - @patch.object(git.Repo, 'clone_from') - def test_ignore_repos(self, _clone): - with patch.object(Org, '_get_github_repos_for_org', return_value=GH_REPOS): - org = Org("testorg", ignore_repos=['repo1']) - self.assertEqual(len(org.repos), 1) - self.assertEqual(org.repos[0].name, "repo2") + # Explicitly pass skip_archived=False to step past line 72 + org = Org(org_name="my-org", skip_archived=False) - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - @patch.object(git.Repo, 'clone_from') - def test_only_repos(self, _clone): - with patch.object(Org, '_get_github_repos_for_org', return_value=GH_REPOS): - org = Org("testorg", only_repos=['repo1']) + # The archived repo should bypass the filter and be added cleanly self.assertEqual(len(org.repos), 1) - self.assertEqual(org.repos[0].name, "repo1") - - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - @patch('contrib_check.org.time.sleep') - def test_rate_limit_exception_prints_message(self, mock_sleep): - with patch.object(Org, '_get_github_repos_for_org', side_effect=RateLimitExceededException(403, "rate limit", {})): - # should not raise; prints a message and returns empty repos - org = Org("testorg") - self.assertEqual(org.repos, []) - - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - def test_github_exception_502_prints_retry(self): - exc = GithubException(502, "bad gateway", {}) - with patch.object(Org, '_get_github_repos_for_org', side_effect=exc): - org = Org("testorg") + mock_repo_class.assert_called_once_with("https://github.com/my-org/old-archived-project") + + @patch('contrib_check.org.Org._get_github_repos_for_org') + def test_reload_repos_rate_limiting_exception(self, mock_get_repos): + """Triggers the API rate limit handler block.""" + mock_get_repos.side_effect = RateLimitExceededException(status=403, data="Rate limit hit", headers={}) + + with patch('time.sleep') as mock_sleep: + org = Org("my-org") + mock_sleep.assert_called_once_with(60) + self.assertEqual(org.repos, []) + + @patch('contrib_check.org.Org._get_github_repos_for_org') + def test_reload_repos_server_error_502_exception(self, mock_get_repos): + """Triggers the 502 branch of the GithubException block.""" + mock_get_repos.side_effect = GithubException(status=502, data={"message": "Bad Gateway"}, headers={}) + org = Org("my-org") self.assertEqual(org.repos, []) - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - def test_github_exception_other_prints_data(self): - exc = GithubException(404, "not found", {}) - with patch.object(Org, '_get_github_repos_for_org', side_effect=exc): - org = Org("testorg") - self.assertEqual(org.repos, []) + @patch('contrib_check.org.Org._get_github_repos_for_org') + def test_reload_repos_other_github_exception_else_branch(self, mock_get_repos): + """Fixes 79 ↛ 82: Triggers the non-502 'else' block inside GithubException.""" + # Use a 404 code to fail the 'if e.status == 502' condition + mock_get_repos.side_effect = GithubException(status=404, data={"message": "Not Found"}, headers={}) - @patch.dict(os.environ, {'GITHUB_TOKEN': 'test123'}) - def test_socket_timeout_prints_retry(self): - with patch.object(Org, '_get_github_repos_for_org', side_effect=socket.timeout()): - org = Org("testorg") + org = Org("my-org") self.assertEqual(org.repos, []) - def tearDown(self): - # clean up any csv files created during tests - for name in ['repo1', 'repo2', 'repo3']: - path = f"testorg-{name}.csv" - if os.path.exists(path): - os.remove(path) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_repo.py b/tests/test_repo.py index 2a4b1b5..e32bc21 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -9,6 +9,7 @@ import tempfile import unittest from unittest.mock import Mock, patch, MagicMock, call +import shutil import git @@ -110,7 +111,7 @@ def test_loads_signoffs_from_directory(self): self.repo.git_repo_object = Mock() self.repo.git_repo_object.head.commit.tree = [mock_tree_entry] - self.repo.loadPastSignoffs() + self.repo.load_past_signoffs() self.assertEqual(self.repo.past_signoffs, [blob_content]) os.unlink(tf_name) @@ -123,13 +124,13 @@ def test_skips_non_matching_directories(self): self.repo.git_repo_object = Mock() self.repo.git_repo_object.head.commit.tree = [mock_tree_entry] - self.repo.loadPastSignoffs() + self.repo.load_past_signoffs() self.assertEqual(self.repo.past_signoffs, []) def test_invalid_repo_returns_false(self): self.repo.git_repo_object = Mock() self.repo.git_repo_object.head.commit.tree.__iter__ = Mock(side_effect=ValueError) - result = self.repo.loadPastSignoffs() + result = self.repo.load_past_signoffs() self.assertFalse(result) @@ -328,6 +329,118 @@ def test_appends_to_existing_remediation_file(self): self.assertIn("short1", content) self.assertIn("short2", content) +class TestRepoBranchCoverage(unittest.TestCase): -if __name__ == '__main__': - unittest.main() + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + # Build mock commit layout + self.mock_commit_obj = MagicMock() + self.mock_commit_obj.git_commit_object.hexsha = "abcdef1234567890" + self.mock_commit_obj.git_commit_object.message = "Fixing a bad bug" + self.mock_commit_obj.git_commit_object.author.name = "John Mertic" + self.mock_commit_obj.git_commit_object.author.email = "john@example.com" + + def tearDown(self): + shutil.rmtree(self.test_dir, ignore_errors=True) + shutil.rmtree('dco-signoffs', ignore_errors=True) + shutil.rmtree('remediation-commits', ignore_errors=True) + + @patch('git.Repo') + def test_init_local_repo_branch(self, mock_git_repo): + """Covers line 69.""" + repo = Repo(self.test_dir) + self.assertEqual(repo.name, os.path.basename(self.test_dir)) + mock_git_repo.assert_called_once_with(self.test_dir) + + @patch('git.Repo') + def test_init_invalid_path_branch(self, mock_git_repo): + """Fixes 69 ↛ 74: Path is neither a GitHub URL nor a valid directory.""" + fake_path = os.path.join(self.test_dir, "does_not_exist") + repo = Repo(fake_path) + self.assertEqual(repo.name, '') + self.assertIsNone(repo.git_repo_object) + + @patch('git.Repo') + def test_load_past_signoffs_defaults(self, mock_git_repo): + """Fixes 77 ↛ 80: Forces dco_signoffs_directories to be None.""" + repo = Repo(self.test_dir) + repo.git_repo_object.head.commit.tree = [] + # Call without arguments to hit the 'is None' branch + result = repo.load_past_signoffs() + self.assertTrue(result) + + @patch('git.Repo') + def test_guard_clauses_empty_repo(self, mock_git_repo): + """Fixes 93 ↛ 94 and 101 ↛ 102: Missing git_repo_object guard routes.""" + fake_path = os.path.join(self.test_dir, "does_not_exist") + repo = Repo(fake_path) # self.git_repo_object is None + + # Should return early gracefully instead of dropping into exceptions + self.assertIsNone(repo.load_remediation_commits()) + self.assertIsNone(repo.scan()) + + @patch('git.Repo') + def test_close_without_csv_ref(self, mock_git_repo): + """Fixes 130 ↛ 133: close() called when __csvfileref is already None.""" + repo = Repo(self.test_dir) + repo.close() # First call closes it + repo.close() # Second call forces the 'if self.__csvfileref' to be False + + @patch('git.Repo') + def test_write_error_uninitialized_csv(self, mock_git_repo): + """Fixes 142 ↛ 143: Verifies RuntimeError when writing without a file setup.""" + repo = Repo(self.test_dir) + # Manually destroy the internal writer object + repo._Repo__csv_writer = None + + with self.assertRaises(RuntimeError): + repo.write_error(self.mock_commit_obj, 'dco') + + @patch('git.Repo') + @patch('contrib_check.repo.Commit') + def test_write_error_non_dco_type(self, mock_commit_class, mock_git_repo): + """Fixes 156 ↛ exit: Skips the write_dco_prior_commits_file branch.""" + repo = Repo(self.test_dir) + output_target = os.path.join(self.test_dir, "scan_output.csv") + repo.csv_filename = output_target + + # Inject custom error type that isn't 'dco' + repo.error_types['custom'] = 'Some alternative issue' + + with patch.object(repo, 'write_dco_prior_commits_file') as mock_write_priors: + repo.write_error(self.mock_commit_obj, 'custom') + # Verify the DCO-specific file write block was bypassed + mock_write_priors.assert_not_called() + + @patch('git.Repo') + @patch('contrib_check.repo.Commit') + def test_load_remediation_commits_true_branch(self, mock_commit_class, mock_git_repo): + mock_commit_instance = mock_commit_class.return_value + mock_commit_instance.is_remediation_commit.return_value = True + mock_commit_instance.remediations = ["remediation_alpha"] + mock_git_repo.return_value.iter_commits.return_value = [MagicMock()] + + repo = Repo(self.test_dir) + repo.load_remediation_commits() + self.assertIn("remediation_alpha", repo.remediations) + + @patch('git.Repo') + def test_csv_filename_removes_existing_file(self, mock_git_repo): + repo = Repo(self.test_dir) + test_csv_path = os.path.join(self.test_dir, "clashing_output.csv") + with open(test_csv_path, 'w') as f: + f.write("old data") + + repo.csv_filename = test_csv_path + self.assertEqual(os.path.getsize(test_csv_path), 0) + + @patch('git.Repo') + def test_load_past_signoffs_with_explicit_directories(self, mock_git_repo): + """Fixes 69 ↛ 72: Forces dco_signoffs_directories to be a provided list instead of None.""" + repo = Repo(self.test_dir) + repo.git_repo_object.head.commit.tree = [] + + # Pass an explicit custom list to evaluate the False/skip branch condition + result = repo.load_past_signoffs(dco_signoffs_directories=["custom-signoffs-dir"]) + self.assertTrue(result) From e9c67b4d525002638a1a5249f0864314618f2fb5 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Mon, 18 May 2026 17:13:35 -0400 Subject: [PATCH 19/20] Actually write out remediation commits Signed-off-by: John Mertic --- .gitignore | 1 + contrib_check/repo.py | 1 + 2 files changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 36a54ba..e75881b 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ Temporary Items /*.yml /*.csv /dco-signoffs/ +/remediation-commits/ diff --git a/contrib_check/repo.py b/contrib_check/repo.py index ca3ae16..ed5982a 100644 --- a/contrib_check/repo.py +++ b/contrib_check/repo.py @@ -147,6 +147,7 @@ def write_error(self, commit, error_type): if error_type == 'dco': self.write_dco_prior_commits_file(commit) + self.write_individual_remediation_commit(commit) def write_individual_remediation_commit(self, commit): os.makedirs(self.remediation_commits_dir, exist_ok=True) From de2ea127230374608eba6320a09e474f7f6ad9c1 Mon Sep 17 00:00:00 2001 From: John Mertic Date: Tue, 19 May 2026 09:04:36 -0400 Subject: [PATCH 20/20] Add logging and fix regexs for finding commits Signed-off-by: John Mertic --- .gitignore | 1 + contrib_check/commit.py | 13 +++++-- contrib_check/main.py | 41 ++++++++++++++++---- contrib_check/org.py | 67 +++++++++++++++++++-------------- contrib_check/repo.py | 5 ++- pyproject.toml | 3 ++ tests/conftest.py | 20 ++++++++++ tests/test_commit.py | 1 - tests/test_gitremoteprogress.py | 2 + tests/test_org.py | 2 + tests/test_repo.py | 41 +++++++++++--------- 11 files changed, 137 insertions(+), 59 deletions(-) create mode 100644 tests/conftest.py diff --git a/.gitignore b/.gitignore index e75881b..66ec55c 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,4 @@ Temporary Items /*.csv /dco-signoffs/ /remediation-commits/ +/*.log diff --git a/contrib_check/commit.py b/contrib_check/commit.py index fdae645..6aa8b16 100644 --- a/contrib_check/commit.py +++ b/contrib_check/commit.py @@ -8,6 +8,7 @@ # Provides a decorator-esque pattern on the GitPython.Commit object to add the commit inspection capabilities import re +import logging # third party modules import yaml @@ -22,8 +23,8 @@ class Commit(): create_prior_commits_dir = 'dco-signoffs' - remediation_regex_individual = "^I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)$" - remediation_regex_thirdparty = "^On behalf of (.*) <(.*)>, I, (.*) <(.*)>, hereby add my Signed-off-by to this commit: (.*)$" + remediation_regex_individual = r"I,\s+(.*?)\s+<(.*?)>,\s+hereby\s+add\s+my\s+Signed-off-by\s+to\s+this\s+commit:\s+([a-f0-9]+)" + remediation_regex_thirdparty = r"On\s+behalf\s+of\s+(.*?)\s+<(.*?)>,\s+I,\s+(.*?)\s+<(.*?)>,\s+hereby\s+add\s+my\s+Signed-off-by\s+to\s+this\s+commit:\s+([a-f0-9]+)" allow_remediation_commit_individual = False allow_remediation_commit_thirdparty = False @@ -74,15 +75,19 @@ def is_remediation_commit(self): is_remediation_commit = False if self.allow_remediation_commit_individual: - for match in re.findall(self.remediation_regex_individual,self.git_commit_object.message,flags=re.I|re.M): + logging.getLogger().debug(f"Looking for individual remediation commits for commit {self.git_commit_object.hexsha}") + for match in re.findall(self.remediation_regex_individual,self.git_commit_object.message,flags=re.I|re.M|re.DOTALL): # ensure it's a valid remediation commit by matching the author with the attestation if ( match[0] == self.git_commit_object.author.name ) and ( match[1] == self.git_commit_object.author.email ): + logging.getLogger().debug(f"Found individual remediation commit {match[2]} in commit {self.git_commit_object.hexsha}") self.remediations.append(match[2]) is_remediation_commit = True if self.allow_remediation_commit_thirdparty: - for match in re.findall(self.remediation_regex_thirdparty,self.git_commit_object.message,flags=re.I|re.M): + logging.getLogger().debug(f"Looking for third party remediation commits for commit {self.git_commit_object.hexsha}") + for match in re.findall(self.remediation_regex_thirdparty,self.git_commit_object.message,flags=re.I|re.M|re.DOTALL): # ensure it's a valid remediation commit by matching the author with the attestation if ( match[2] == self.git_commit_object.author.name ) and ( match[3] == self.git_commit_object.author.email ): + logging.getLogger().debug(f"Found third party remediation commit {match[4]} in commit {self.git_commit_object.hexsha}") self.remediations.append(match[4]) is_remediation_commit = True diff --git a/contrib_check/main.py b/contrib_check/main.py index d9a36e1..65b44e4 100644 --- a/contrib_check/main.py +++ b/contrib_check/main.py @@ -8,13 +8,14 @@ import shutil from argparse import ArgumentParser, FileType from datetime import datetime +import logging +import sys # third party modules import yaml from contrib_check.repo import Repo from contrib_check.org import Org - def main(): startTime = datetime.now() @@ -24,9 +25,39 @@ def main(): group.add_argument("--repo", dest="repo", help="URL or path to the repo to search") group.add_argument("--org", dest="org", help="URL to GitHub org to search") parser.add_argument("--dco", dest="dco", help="Perform a DCO check (defaults to true)", default=True) + parser.add_argument("--dco-allow-individual-remediation-commits", + dest="dco-allow-individual-remediation-commits", + help="Allow individual remediation commits for DCO signoffs (only needed if not enabled in dco.yml in the repo)", + default=False) + parser.add_argument("--dco-allow-thirdparty-remediation-commits", + dest="dco-allow-thirdparty-remediation-commits", + help="Allow third party remediation commits for DCO signoffs (only needed if not enabled in dco.yml in the repo)", + default=False) + parser.add_argument("--dco-allow-dcosignoffs", + dest="dco-allow-dcosignoffs", + help="Allow reading and writing legacy DCO Signoff files") + parser.add_argument("-l", "--log", dest="loglevel", default="error", + choices=['debug', 'info', 'warning', 'error', 'critical'], help="logging level") + parser.add_argument("--logfile", default='debug.log', help="Name for the log file") args = parser.parse_args() + levels = { + 'critical': logging.CRITICAL, # errors that mean an immediate stop + 'error': logging.ERROR, # general errors that will effect the output + 'warn': logging.WARNING, # errors that can be caught and corrected + 'warning': logging.WARNING, + 'info': logging.INFO, # infomational messages + 'debug': logging.DEBUG # messages to help debug things misbehaving ;-) + } + handlers = [logging.FileHandler(args.logfile,mode="w")] + handlers.append(logging.StreamHandler(sys.stdout)) + logging.basicConfig( + level=levels.get(args.loglevel.lower()), + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=handlers + ) + config = {} if args.configfile: config = yaml.safe_load(args.configfile) @@ -57,7 +88,7 @@ def main(): repos = [Repo(args.repo)] for repoObj in repos: - print("Searching repo {}...".format(repoObj.name)) + logging.getLogger().info(f"Searching repo {repoObj.name}") if 'dco' in config or args.dco: if 'dco' in config and 'prior_commits_directory' in config['dco']: repoObj.prior_commits_dir = config['dco']['prior_commits_directory'] @@ -67,8 +98,4 @@ def main(): repoObj.load_past_signoffs() repoObj.scan() - print("This took " + str(datetime.now() - startTime)) - - -if __name__ == '__main__': - main() + logging.getLogger().info("This took {} seconds".format(str(datetime.now() - startTime))) diff --git a/contrib_check/org.py b/contrib_check/org.py index 83cb2a6..aeb5da2 100644 --- a/contrib_check/org.py +++ b/contrib_check/org.py @@ -9,20 +9,13 @@ import socket import re import time +import logging from github import Github, GithubException, RateLimitExceededException from .repo import Repo class Org(): - __org_name = '' - __org_type = 'github' - ignore_repos = [] - only_repos = [] - skip_archived = True - lazy_load = False - repos = [] - def __init__(self, org_name, org_type='github', ignore_repos=None, only_repos=None, skip_archived=True, load_repos=True): # Properly bound mutable states to this exact instance self.ignore_repos = ignore_repos if ignore_repos is not None else [] @@ -58,33 +51,49 @@ def org_type(self, org_type): raise Exception('Github token is not defined. Set GITHUB_TOKEN environment variable to a valid Github token') self.__org_type = org_type + def _should_skip_repo(self, gh_repo) -> bool: + """Helper method to handle repository filtering logic. + + Extracting this removes nested conditionals from the main loop, + drastically reducing cognitive complexity. + """ + if self.ignore_repos and gh_repo.name in self.ignore_repos: + return True + if self.only_repos and gh_repo.name not in self.only_repos: + return True + if self.skip_archived and gh_repo.archived: + return True + return False + def reload_repos(self): self.repos = [] - if self.org_type == 'github': - try: - gh_repos = self._get_github_repos_for_org() - for gh_repo in gh_repos: - if self.ignore_repos and gh_repo.name in self.ignore_repos: - continue - if self.only_repos and gh_repo.name not in self.only_repos: - continue - if self.skip_archived and gh_repo.archived: - continue - self.repos.append(Repo(gh_repo.html_url)) - except RateLimitExceededException: - print("Sleeping until we get past the API rate limit....") - time.sleep(60) - except GithubException as e: - if e.status == 502: - print("Server error - retrying...") - else: - print(e.data) - except socket.timeout: - print("Server error - retrying...") + # Guard clause: Exit early if it's not a GitHub org type + if self.org_type != 'github': + return self.repos + + try: + gh_repos = self._get_github_repos_for_org() + for gh_repo in gh_repos: + if self._should_skip_repo(gh_repo): + continue + self.repos.append(Repo(gh_repo.html_url)) + logging.getLogger().info(f"Adding repo {gh_repo.html_url}") + + except RateLimitExceededException: + logging.getLogger().info("Sleeping until we get past the API rate limit....") + time.sleep(60) + except GithubException as e: + if e.status == 502: + logging.getLogger().error("Server error - retrying...") + else: + logging.getLogger().error(e.data) + except socket.timeout: + logging.getLogger().error("Server error - retrying...") return self.repos def _get_github_repos_for_org(self): g = Github(login_or_token=os.environ['GITHUB_TOKEN'], per_page=1000) + logging.getLogger().info(f"Loading repos for {self.org_name}") return g.get_organization(self.org_name).get_repos() diff --git a/contrib_check/repo.py b/contrib_check/repo.py index ed5982a..0d160d3 100644 --- a/contrib_check/repo.py +++ b/contrib_check/repo.py @@ -15,6 +15,7 @@ import csv import re import shutil +import logging from alive_progress import alive_bar import git @@ -64,6 +65,7 @@ def __init__(self, repo_path): self.csv_filename = f"{self.name}.csv" self.load_remediation_commits() + self.load_past_signoffs() def load_past_signoffs(self, dco_signoffs_directories=None): if dco_signoffs_directories is None: @@ -77,7 +79,7 @@ def load_past_signoffs(self, dco_signoffs_directories=None): content = content_file.read() self.past_signoffs.append(content) except (ValueError, AttributeError): - print("...invalid or empty repo - skipping") + logging.getLogger().error(f"{self.git_repo_object} is an invalid or empty repo - skipping") return False return True @@ -131,6 +133,7 @@ def __del__(self): self.close() def write_error(self, commit, error_type): + logging.getLogger().error(f"Found error '{error_type}' in commit {commit.git_commit_object.hexsha}") if not self.__csv_writer: raise RuntimeError("CSV file has not been initialized via csv_filename setter.") diff --git a/pyproject.toml b/pyproject.toml index d73037e..eb2c337 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,9 @@ pytest-cov = "^7.1.0" [tool.pytest.ini_options] testpaths = ["tests"] +log_file = "pytest.log" +log_file_level = "DEBUG" +log_auto_indent = true [tool.coverage.run] source = ["contrib_check"] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..1f7c73b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# +# Copyright this project and it's contributors +# SPDX-License-Identifier: Apache-2.0 +# +# encoding=utf8 + +import logging +import pytest + +logger = logging.getLogger(__name__) + +@pytest.hookimpl(tryfirst=True) +def pytest_runtest_setup(item): + """Executes automatically right before any test begins running. + + 'item' contains all the metadata about the test being executed. + """ + # item.nodeid looks like: tests/test_repo.py::TestRepoInitGithub::test_csv_filename + logger.info(f"============ STARTING TEST: {item.nodeid} ============") diff --git a/tests/test_commit.py b/tests/test_commit.py index 7033b22..18e11ec 100644 --- a/tests/test_commit.py +++ b/tests/test_commit.py @@ -12,7 +12,6 @@ from contrib_check.commit import Commit - def _make_mock_repo(tree_raises_key_error=True): """Return a mock repo whose tree[] raises KeyError by default (no dco.yml).""" mock_repo = Mock() diff --git a/tests/test_gitremoteprogress.py b/tests/test_gitremoteprogress.py index 0ecb056..2c0534f 100644 --- a/tests/test_gitremoteprogress.py +++ b/tests/test_gitremoteprogress.py @@ -7,7 +7,9 @@ import unittest from unittest.mock import MagicMock, patch + import git + from contrib_check.repo import GitRemoteProgress class TestGitRemoteProgressCoverage(unittest.TestCase): diff --git a/tests/test_org.py b/tests/test_org.py index 784d0e8..3b4d28b 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -9,7 +9,9 @@ import os import unittest from unittest.mock import MagicMock, patch + from github import RateLimitExceededException, GithubException + from contrib_check.org import Org class TestOrgCoverage(unittest.TestCase): diff --git a/tests/test_repo.py b/tests/test_repo.py index e32bc21..4052563 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -16,28 +16,35 @@ from contrib_check.repo import Repo from contrib_check.commit import Commit - def _make_repo_github(url="https://github.com/foo/bar"): - """Construct a GitHub-style Repo with clone_from mocked out.""" - with patch.object(git.Repo, 'clone_from') as mock_clone: - mock_git_repo = Mock() - mock_git_repo.iter_commits.return_value = [] - mock_git_repo.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) - mock_clone.return_value = mock_git_repo - repo = Repo(url) - return repo + with patch('git.Repo.clone_from') as mock_clone: + # 1. Setup the basic repo object mock + mock_repo_inst = MagicMock() + mock_clone.return_value = mock_repo_inst + # 2. Fix the iteration bug: force the tree to look like an empty iterable container + mock_repo_inst.head.commit.tree = [] + + # 3. Handle commit iteration mock requirements for other functions + mock_repo_inst.iter_commits.return_value = [] + + repo = Repo(url) + return repo def _make_repo_local(path="."): - """Construct a local Repo with git.Repo mocked out.""" - mock_git_repo = Mock() - mock_git_repo.iter_commits.return_value = [] - mock_git_repo.head.commit.tree.__getitem__ = Mock(side_effect=KeyError) - with patch.object(git.Repo, '__init__', return_value=None): - with patch('contrib_check.repo.git.Repo', return_value=mock_git_repo): - repo = Repo(path) - return repo + with patch('git.Repo') as mock_repo_class: + # 1. Setup the basic repo object mock + mock_repo_inst = MagicMock() + mock_repo_class.return_value = mock_repo_inst + + # 2. Fix the iteration bug: force the tree to look like an empty iterable container + mock_repo_inst.head.commit.tree = [] + + # 3. Handle commit iteration mock requirements + mock_repo_inst.iter_commits.return_value = [] + repo = Repo(path) + return repo class TestRepoInitGithub(unittest.TestCase):