#!/usr/bin/env perl # Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced # at the Lawrence Livermore National Laboratory. All Rights reserved. See files # LICENSE and NOTICE for details. LLNL-CODE-806117. # # This file is part of the MFEM library. For more information and source code # availability visit https://mfem.org. # # MFEM is free software; you can redistribute it and/or modify it under the # terms of the BSD-3 license. We welcome feedback and contributions, see file # CONTRIBUTING.md for details. use strict; use File::Basename; use Cwd; # Print usage information and exit with status code passed as argument sub usage { printf STDOUT <] {mfem_dir} where: {mfem_dir} is the MFEM source directory [default value: ../..] -b is the branch to check [default: HEAD] -h|--help prints this usage information and exits This script checks if the current branch history, defined as the commits that will be merged in master (those shown in a GitHub PR), contains unusually large files, unusually large number of changes in a commit, unusually large number of commits, etc. Example usage: $0 EOF exit $_[0]; } my $mfem_dir = "../.."; my $branch = "HEAD"; while (my $opt = shift) { if ($opt) { if ($opt eq "-h" || $opt eq "--help") { usage 0; } elsif ($opt eq "-b") { $branch = shift; } else { $mfem_dir = $opt; } } } my $cur_dir = getcwd; if (!chdir $mfem_dir) { printf STDERR "Error: invalid value for mfem_dir: $mfem_dir\n"; usage 1; } # Maximum number of acceptable commits in the branch my $branch_max_commits = 200; # Maximum number of acceptable files changed in any commit in the branch my $commit_max_files_changed = 50; # Maximum acceptable size (in K) of any git blob in the branch my $max_blob_kb = 200; # Maximum acceptable size (in K) of any commit in the branch my $max_commit_kb = 400; # Maximum acceptable size (in K) of the sum of the commit sizes in the branch my $max_branch_kb = 1000; my $status = 0; # Return code # Get SHA hash of all commits in this branch my @commits = split /\n/, `git log --pretty=format:%H master..$branch`; # Check if total number of commits in this branch exceeds the maximum allowable my $ncommits = scalar @commits; if ($ncommits > $branch_max_commits) { printf STDERR "\033[31mNumber of commits in this branch (%d) exceeds maximum allowable (%d).\033[0m\n", $ncommits, $branch_max_commits; $status = 1; } # Print SHA in blue sub print_sha { printf STDERR "\033[34m ^---> SHA: %s\033[0m\n", $_[0]; } sub formatSize { my $size = shift; my $exp = 0; my $units = [qw(B KB MB GB TB PB)]; for (@$units) { last if $size < 1024; $size /= 1024; $exp++; } return sprintf("%.2f %s", $size, $units->[$exp]); } # Loop over each commit in this branch, and check for large diffs my $total_size = 0; foreach my $sha (@commits) { my @blobs = `git diff-tree -r -c --root --no-commit-id --find-renames $sha`; my $nfiles_changed = scalar @blobs; if ($nfiles_changed > $commit_max_files_changed) { printf STDERR "\033[31mNumber of files changed in one commit (%d) exceeds maximum allowable (%d).\033[0m\n", $nfiles_changed, $commit_max_files_changed; print_sha($sha); $status = 1; } # Accumulate size of each commit in bytes my $commit_size = 0; foreach my $blob (@blobs) { my @blob_split = (split /\s/, $blob); my $nfields = scalar @blob_split; my $fname = "(no-filename)"; my $blob_size = 0; if ($nfields == 6) { # 0 or 1 parents my $src = @blob_split[2]; my $dst = @blob_split[3]; my $mode = @blob_split[4]; $fname = @blob_split[5]; # File was added if ($mode eq "A") { $blob_size += int(`git cat-file -s $dst`); } # File was copied elsif ($mode =~ m/C\d*/) { $blob_size += int(`git cat-file -s $dst`); } elsif ($mode eq "D") { } # File was modified, use the gzip'ed diff as a proxy of the required git storage elsif ($mode =~ m/M\d*/) { $blob_size += int(`git diff -U0 --binary $src $dst | gzip -c | wc -c`); } # File was renamed (at least 50% similarity) elsif ($mode =~ m/R\d*/) { $blob_size += int(`git diff -U0 --binary $src $dst | gzip -c | wc -c`); } elsif ($mode eq "T") { } elsif ($mode eq "U") { } else { die "Unknown git status letter." } } elsif ($nfields == 8) { # 2 parents my $src1 = @blob_split[3]; my $src2 = @blob_split[4]; my $dst = @blob_split[5]; my $mode = @blob_split[6]; $fname = @blob_split[7]; if ($mode eq "AA") { # File was added $blob_size += int(`git cat-file -s $dst`); } elsif ($mode eq "DD") { } elsif ($mode eq "MM" || $mode eq "MR" || $mode eq "RM") { # File was modified, use the gzip'ed diff as a proxy of the required git # storage my $sz1 = int(`git diff -U0 --binary $src1 $dst | gzip -c | wc -c`); my $sz2 = int(`git diff -U0 --binary $src2 $dst | gzip -c | wc -c`); $blob_size += $sz1 < $sz2 ? $sz1 : $sz2; } elsif ($mode =~ m/A[MR]/) { my $sz2 = int(`git diff -U0 --binary $src2 $dst | gzip -c | wc -c`); $blob_size += $sz2; } elsif ($mode =~ m/[MR]A/) { my $sz1 = int(`git diff -U0 --binary $src1 $dst | gzip -c | wc -c`); $blob_size += $sz1; } else { die "Unknown git status letter: $mode, commit: $sha, file: $fname.\n\t" } } if ($blob_size > $max_blob_kb*1024) { $status = 1; printf STDERR "\033[31mLarge change of size %s in file %s.\033[0m\n", formatSize($blob_size), $fname; print_sha($sha); } $commit_size += $blob_size; } if ($commit_size > $max_commit_kb*1024) { $status = 1; printf STDERR "\033[31mLarge commit of size %s.\033[0m\n", formatSize($commit_size); print_sha($sha); } $total_size += $commit_size; } if ($total_size > $max_branch_kb*1024) { printf STDERR "\033[31mLarge total branch size: %s.\033[0m\n", formatSize($total_size); $status = 1; } if ($status) { printf STDERR "\033[36mBranch $branch has errors.\033[0m\n"; chdir $cur_dir; my $testname = basename $0; open(my $f, '>', "$testname.msg"); print $f <