#!/bin/sh
# Check the format of GNU Emacs change log entries.

# Copyright 2014-2021 Free Software Foundation, Inc.

# This file is part of GNU Emacs.

# GNU Emacs is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# GNU Emacs is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

# Written by Paul Eggert.

# Prefer gawk if available, as it handles NUL bytes properly.
if type gawk >/dev/null 2>&1; then
  awk=gawk
else
  awk=awk
fi

# Use a UTF-8 locale if available, so that the UTF-8 check works.
# Use U+00A2 CENT SIGN to test whether the locale works.
cent_sign_utf8_format='\302\242\n'
cent_sign=`printf "$cent_sign_utf8_format"`
print_at_sign='BEGIN {print substr("'$cent_sign'@", 2)}'
at_sign=`$awk "$print_at_sign" </dev/null 2>/dev/null`
if test "$at_sign" != @; then
  at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" </dev/null 2>/dev/null`
  if test "$at_sign" = @; then
    LC_ALL=en_US.UTF-8
  else
    LC_ALL=C
  fi
  export LC_ALL
fi

# Check the log entry.
exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" -v file="$1" '
  BEGIN {
    # These regular expressions assume traditional Unix unibyte behavior.
    # They are needed for old or broken versions of awk, e.g.,
    # mawk 1.3.3 (1996), or gawk on MSYS (2015), and/or for systems that
    # cannot use UTF-8 as the codeset for the locale.
    space = "[ \f\n\r\t\v]"
    non_space = "[^ \f\n\r\t\v]"
    # The non_print below rejects control characters and surrogates
    # UTF-8 for: 0x01-0x1f 0x7f   0x80-0x9f  0xd800-0xdbff  0xdc00-0xdfff
    non_print = "[\1-\37\177]|\302[\200-\237]|\355[\240-\277][\200-\277]"

    # Prefer POSIX regular expressions if available, as they do a
    # better job of checking.  Similarly, prefer POSIX negated
    # expressions if UTF-8 also works.
    if (" " ~ /[[:space:]]/) {
      space = "[[:space:]]"
      if (at_sign == "@" && cent_sign ~ /^[[:print:]]$/) {
        non_space = "[^[:space:]]"
        non_print = "[^[:print:]]"
      }
    }
    c_lower = "abcdefghijklmnopqrstuvwxyz"
    unsafe_gnu_url = "(http|ftp)://([" c_lower ".]*\\.)?(gnu|fsf)\\.org"
  }

  { input[NR] = $0 }

  /^#/ {
    # Ignore every line after a scissors line.
    if (/^# *---* *(>[8%]|[8%]<) *---* *$/) { exit }

    # Ignore comment lines.
    next
  }

  !/^.*$/ {
    print "Invalid character (not UTF-8) in commit message"
    status = 1
  }

  nlines == 0 && $0 !~ non_space { next }

  { nlines++ }

  nlines == 1 {
    # Ignore special markers used by "git rebase --autosquash".
    if (! sub(/^fixup! /, ""))
      sub(/^squash! /, "")

    if ($0 ~ "^" space) {
      print "White space at start of commit message'\''s first line"
      status = 1
    }
  }

  nlines == 2 && $0 ~ non_space {
    print "Nonempty second line in commit message"
    status = 1
  }

  {
    # Expand tabs to spaces for length calculations etc.
    while (match($0, /\t/)) {
      before_tab = substr($0, 1, RSTART - 1)
      after_tab = substr($0, RSTART + 1)
      $0 = sprintf("%s%*s%s", before_tab, 8 - (RSTART - 1) % 8, "", after_tab)
    }
  }

  78 < length && $0 ~ space {
    print "Line longer than 78 characters in commit message"
    status = 1
  }

  140 < length {
    print "Word longer than 140 characters in commit message"
    status = 1
  }

  /^Signed-off-by: / {
    print "'\''Signed-off-by:'\'' in commit message"
    status = 1
  }

  $0 ~ unsafe_gnu_url {
    needs_rewriting = 1
  }

  $0 ~ non_print {
    print "Unprintable character in commit message"
    status = 1
  }

  END {
    if (nlines == 0) {
      print "Empty commit message"
      status = 1
    }
    if (status == 0 && needs_rewriting) {
      for (i = 1; i <= NR; i++) {
	line = input[i]
	while (match(line, unsafe_gnu_url)) {
	  prefix = substr(line, 1, RSTART - 1)
	  suffix = substr(line, RSTART)
	  line = prefix "https:" substr(suffix, 5 + (suffix ~ /^http:/))
	}
	print line >file
      }
      if (close(file) != 0) {
	print "Cannot rewrite: " file
	status = 1
      }
    }
    if (status != 0) {
      print "Commit aborted; please see the file 'CONTRIBUTE'"
    }
    exit status
  }
' <"$1"