# Copyright (c) 2011, Apple Inc. All rights reserved. # Copyright (c) 2009, 2011, 2012 Google Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # WebKit's Python module for committer and reviewer validation. import fnmatch import json from webkitpy.common.editdistance import edit_distance from webkitpy.common.memoized import memoized from webkitpy.common.system.filesystem import FileSystem # The list of contributors have been moved to contributors.json class Contributor(object): def __init__(self, name, email_or_emails, irc_nickname_or_nicknames=None): assert(name) assert(email_or_emails) self.full_name = name if isinstance(email_or_emails, str): self.emails = [email_or_emails] else: self.emails = email_or_emails self.emails = map(lambda email: email.lower(), self.emails) # Emails are case-insensitive. if isinstance(irc_nickname_or_nicknames, str): self.irc_nicknames = [irc_nickname_or_nicknames] else: self.irc_nicknames = irc_nickname_or_nicknames self.can_commit = False self.can_review = False def bugzilla_email(self): # FIXME: We're assuming the first email is a valid bugzilla email, # which might not be right. return self.emails[0] def __str__(self): return unicode(self).encode('utf-8') def __unicode__(self): return '"%s" <%s>' % (self.full_name, self.emails[0]) def contains_string(self, search_string): string = search_string.lower() if string in self.full_name.lower(): return True if self.irc_nicknames: for nickname in self.irc_nicknames: if string in nickname.lower(): return True for email in self.emails: if string in email: return True return False def matches_glob(self, glob_string): if fnmatch.fnmatch(self.full_name, glob_string): return True if self.irc_nicknames: for nickname in self.irc_nicknames: if fnmatch.fnmatch(nickname, glob_string): return True for email in self.emails: if fnmatch.fnmatch(email, glob_string): return True return False class Committer(Contributor): def __init__(self, name, email_or_emails, irc_nickname=None): Contributor.__init__(self, name, email_or_emails, irc_nickname) self.can_commit = True class Reviewer(Committer): def __init__(self, name, email_or_emails, irc_nickname=None): Committer.__init__(self, name, email_or_emails, irc_nickname) self.can_review = True class CommitterList(object): # Committers and reviewers are passed in to allow easy testing def __init__(self, committers=[], reviewers=[], contributors=[]): # FIXME: These arguments only exist for testing. Clean it up. if not (committers or reviewers or contributors): loaded_data = self.load_json() contributors = loaded_data['Contributors'] committers = loaded_data['Committers'] reviewers = loaded_data['Reviewers'] self._contributors = contributors + committers + reviewers self._committers = committers + reviewers self._reviewers = reviewers self._contributors_by_name = {} self._accounts_by_email = {} self._accounts_by_login = {} @staticmethod @memoized def load_json(): filesystem = FileSystem() json_path = filesystem.join(filesystem.dirname(filesystem.path_to_module('webkitpy.common.config')), 'contributors.json') contributors = json.loads(filesystem.read_text_file(json_path)) return { 'Contributors': [Contributor(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Contributors'].iteritems()], 'Committers': [Committer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Committers'].iteritems()], 'Reviewers': [Reviewer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Reviewers'].iteritems()], } def contributors(self): return self._contributors def committers(self): return self._committers def reviewers(self): return self._reviewers def _name_to_contributor_map(self): if not len(self._contributors_by_name): for contributor in self._contributors: assert(contributor.full_name) assert(contributor.full_name.lower() not in self._contributors_by_name) # We should never have duplicate names. self._contributors_by_name[contributor.full_name.lower()] = contributor return self._contributors_by_name def _email_to_account_map(self): if not len(self._accounts_by_email): for account in self._contributors: for email in account.emails: assert(email not in self._accounts_by_email) # We should never have duplicate emails. self._accounts_by_email[email] = account return self._accounts_by_email def _login_to_account_map(self): if not len(self._accounts_by_login): for account in self._contributors: if account.emails: login = account.bugzilla_email() assert(login not in self._accounts_by_login) # We should never have duplicate emails. self._accounts_by_login[login] = account return self._accounts_by_login def _committer_only(self, record): if record and not record.can_commit: return None return record def _reviewer_only(self, record): if record and not record.can_review: return None return record def contributor_by_irc_nickname(self, irc_nickname): for contributor in self.contributors(): # FIXME: This should do case-insensitive comparison or assert that all IRC nicknames are in lowercase if contributor.irc_nicknames and irc_nickname in contributor.irc_nicknames: return contributor return None def contributors_by_search_string(self, string): glob_matches = filter(lambda contributor: contributor.matches_glob(string), self.contributors()) return glob_matches or filter(lambda contributor: contributor.contains_string(string), self.contributors()) def contributors_by_email_username(self, string): string = string + '@' result = [] for contributor in self.contributors(): for email in contributor.emails: if email.startswith(string): result.append(contributor) break return result def _contributor_name_shorthands(self, contributor): if ' ' not in contributor.full_name: return [] split_fullname = contributor.full_name.split() first_name = split_fullname[0] last_name = split_fullname[-1] return first_name, last_name, first_name + last_name[0], first_name + ' ' + last_name[0] def _tokenize_contributor_name(self, contributor): full_name_in_lowercase = contributor.full_name.lower() tokens = [full_name_in_lowercase] + full_name_in_lowercase.split() if contributor.irc_nicknames: return tokens + [nickname.lower() for nickname in contributor.irc_nicknames if len(nickname) > 5] return tokens def contributors_by_fuzzy_match(self, string): string_in_lowercase = string.lower() # 1. Exact match for fullname, email and irc_nicknames account = self.contributor_by_name(string_in_lowercase) or self.contributor_by_email(string_in_lowercase) or self.contributor_by_irc_nickname(string_in_lowercase) if account: return [account], 0 # 2. Exact match for email username (before @) accounts = self.contributors_by_email_username(string_in_lowercase) if accounts and len(accounts) == 1: return accounts, 0 # 3. Exact match for first name, last name, and first name + initial combinations such as "Dan B" and "Tim H" accounts = [contributor for contributor in self.contributors() if string in self._contributor_name_shorthands(contributor)] if accounts and len(accounts) == 1: return accounts, 0 # 4. Finally, fuzzy-match using edit-distance string = string_in_lowercase contributorWithMinDistance = [] minDistance = len(string) / 2 - 1 for contributor in self.contributors(): tokens = self._tokenize_contributor_name(contributor) editdistances = [edit_distance(token, string) for token in tokens if abs(len(token) - len(string)) <= minDistance] if not editdistances: continue distance = min(editdistances) if distance == minDistance: contributorWithMinDistance.append(contributor) elif distance < minDistance: contributorWithMinDistance = [contributor] minDistance = distance if not len(contributorWithMinDistance): return [], len(string) return contributorWithMinDistance, minDistance def contributor_by_email(self, email): return self._email_to_account_map().get(email.lower()) if email else None def contributor_by_name(self, name): return self._name_to_contributor_map().get(name.lower()) if name else None def committer_by_email(self, email): return self._committer_only(self.contributor_by_email(email)) def committer_by_name(self, name): return self._committer_only(self.contributor_by_name(name)) def reviewer_by_email(self, email): return self._reviewer_only(self.contributor_by_email(email)) def reviewer_by_name(self, email): return self._reviewer_only(self.contributor_by_name(email))