#!/usr/bin/env perl

# tdiff - diff for terminals (colorized, with UTF-8 conversion)

use strict;

my ($proc,$vers) = '$Id: tdiff 171413 2024-09-01 12:25:23Z vinc17/qaa $'
  =~ /^.Id: (\S+) (\d+ \d{4}-\d\d-\d\d \d\d:\d\d:\d\d)Z / or die;

my $copyright = <<'EOF';
Copyright 2006-2017 Vincent Lefevre <vincent@vinc17.net>.
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
EOF

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License along
# with this program; if not, see <https://www.gnu.org/licenses/>.

# A short documentation can be obtained with "tdiff --help"
# or can be read directly in the source code below.

# History:
# 2006-11-10: First version (unified diff format only).
# 2006-11-13: Added hunk detection and automatical conversion into UTF-8.
# 2007-02-12: Added support for copied context.
# 2007-02-14: Added info color (for patches produced with "diff -p").
# 2007-02-17: Added support for config files.
# 2007-02-18: Improved conversion into UTF-8. First released version.
# 2008-04-15: Improved open with pipe (forgot to test if fork failed).
# 2010-06-17: Better UTF-8 charmap detection (with I18N::Langinfo).
# 2010-11-04: Conversion into UTF-8: allow hunks to use mixed encodings.
# 2012-05-31: Use 'sgr0:7' by default for normal text.
# 2012-06-18: Major update, with support of Subversion 1.7 property changes.
#             License changed to GNU GPLv3+.
# 2017-07-20: Use autoflush to mitigate potential race conditions.
# 2024-09-01: Updated copyright notice and URLs.

# The latest version of tdiff can be obtained at the following URL:
#   https://www.vinc17.net/unix/#tdiff
# or
#   http://www.vinc17.org/unix/#tdiff

$ARGV[0] eq '--help' and print(<<'EOF'), exit;
Short documentation (more information by reading the source):
* Usage: tdiff [diff arguments]
      or tdiff --help
      or tdiff --version
* Examples:
    tdiff -u file1 file2
    diff -u file1 file2 | tdiff
    tdiff < patchfile
    svn diff | tdiff
    cvs diff | tdiff
* You can modify the default configuration by putting commands into
  config files /etc/tdiffrc and $HOME/.tdiffrc (or more precisely,
  files specified by the TDIFFRC environment variable -- by default,
  it is equivalent to "/etc/tdiffrc:$ENV{HOME}/.tdiffrc"). Commands
  are just Perl ones. Examples:
    $eight_bit_encoding = 'iso-8859-15';
    $start{'hunk'} = tput 'bold:4';
  As these config files are Perl scripts, they must be owned by root
  when root executes tdiff.
  If your shell is zsh, you can even do:
    svn diff | TDIFFRC=<(echo 'undef $utf8') tdiff
* Under UTF-8 locales, a line is automatically converted into UTF-8
  if it contains an invalid UTF-8 sequence. Because lines are not
  buffered, this is now done on a line by line basis.
EOF

$ARGV[0] eq '--version' and print("$proc $vers\n\n$copyright"), exit;

my @color = qw/setaf setab/;
my %tput;

sub tput ($)
  {
    if (!defined $tput{$_[0]})
      {
        if ($_[0] =~ /:/)
          {
            my $i = 0;
            foreach (split /:/, $_[0])
              { $tput{$_[0]} .= tput(/^\d+$/ ? $color[$i++]." $_" : $_) }
          }
        else
          {
            $tput{$_[0]} = `tput $_[0]`;
          }
      }
    return $tput{$_[0]};
  }

# Use 'our' to allow the config files to see and modify these variables.
our $eight_bit_encoding = 'iso-8859-1';
our $end = tput('sgr0');

# To use the default color: ''. But let's use bright colors by default,
# except for normal text ('sgr0:7' is used in case the default color is
# already bright).
our %start =
  ('normal' => tput('sgr0:7'),
   'old'    => tput('bold:1'),
   'new'    => tput('bold:2'),
   'same'   => tput('bold:7'),
   'hunk'   => tput('bold:5'),
   'info'   => tput('sgr0:6'),
   'nonl'   => tput('sgr0:5'),
   'dir'    => tput('bold:4'),
   'vcsfs'  => tput('bold:3'),
   'vcsfd'  => tput('bold:6'),
   'svnps'  => tput('bold:3'),
   'svnpd'  => tput('bold:6'),
  );

# Assume no UTF-8 by default.
our $utf8 =
  eval
  {
    require I18N::Langinfo;
    I18N::Langinfo::langinfo(I18N::Langinfo::CODESET()) eq 'UTF-8';
  }
  or
  eval
  {
    require POSIX;
    POSIX::setlocale('LC_CTYPE') =~ /utf-?8/i;
  };

foreach ($ENV{'TDIFFRC'} ? split /:/, $ENV{'TDIFFRC'} :
         ("/etc/tdiffrc", "$ENV{HOME}/.tdiffrc"))
  {
    # For security reasons, when tdiff is run as root, the config file
    # must be owned by root to be evaluated.
    do $_ if $< || -o $_;
  }

my $stream;

# Without arguments, the diff is read from the standard input stream.
# Otherwise, the 'diff' command is called with the provided arguments
# and tdiff acts as a wrapper, processing the 'diff' output.
if (@ARGV)
  {
    open DIFF, "-|", 'diff', @ARGV or die "$0: can't exec diff: $!\n";
    $stream = *DIFF;
  }
else
  {
    $stream = *STDIN;
  }

########################################################################

sub output
  {
    @_ == 1 && push @_, $_;
    while (@_)
      {
        my ($c,$s) = splice @_, 0, 2;
        chomp $s;

        # In a UTF-8 environment, automatically convert data into UTF-8
        # when invalid sequences are found.
        if ($utf8)
          {
            eval
              {
                require Encode;
                my $t = $s;  # the string may be modified by Encode::decode.
                Encode::decode('utf8', $t, 1);
              };
            $@ =~ /^utf8.*does not map to Unicode/
              and Encode::from_to($s, $eight_bit_encoding, 'utf8');
          }

        print $start{$c}.$s.$end;
      }
    print "\n";
  }

########################################################################

my $range = qr/(\d+)(?:,(\d+))?/;
my ($ctx,$hend,$hm,$hp,$index,$prop);

# Do not buffer output in order to avoid an escape sequence being split
# by buffering if tdiff output is sent to a pipe. This is useful to avoid
# a race condition when another process sends an escape sequence to the
# terminal at the same time, e.g. to change the title of the terminal, as
# escape sequences must not be mixed. An example of issue:
#
#   https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=785497
#
# But even autoflush does not solve this issue with "tdiff | less -R".
$| = 1;

while (<$stream>)
  {
    defined $ctx or &other, next;
    if ($ctx eq 'u')
      {
        my $s = substr $_, 0, 1;
        if ($s eq ' ')
          { output('same'); $hm--; $hp--; }
        elsif ($s eq '-')
          { output('old'); $hm--; }
        elsif ($s eq '+')
          { output('new'); $hp--; }
        else
          { &other; }
      }
    elsif ($ctx eq 'c' && defined $hm && substr($_, 0, 3) eq '---')
      {
        output('hunk');
        undef $hm;
      }
    elsif ($ctx eq 'a' || ($ctx eq 'c' && !defined $hm))
      {
        if (substr($_, 0, 2) eq '> ')
          { output('new'); $hp--; }
        else
          { &other; }
      }
    elsif ($ctx eq 'd' || ($ctx eq 'c' && defined $hm))
      {
        if (substr($_, 0, 2) eq '< ')
          { output('old'); $hm--; }
        else
          { &other; }
      }
    else
      { die "$0: internal error" }
    $hend = $. if $hm == 0 && $hp == 0;
    $hm > 0 || $hp > 0 or undef $ctx;
  }

if (@ARGV)
  {
    close DIFF;
    exit 255 if $? & 127;
    exit $? >> 8;
  }

sub nr
  { defined $_[1] ? $_[1] - $_[0] + 1 : 1 }

sub other
  {
    my $hmark = defined $prop ? qr/##/ : qr/@@/;
    if (/^($hmark -(?:\d+,)?(\d+) \+(?:\d+,)?(\d+) $hmark)(.*)/)
      {
        ($ctx,$hm,$hp) = ('u',$2,$3);
        output('hunk', $1, 'info', $4);
        return;
      }
    /^\d+a${range}$/
      and output('hunk'), ($ctx,$hm,$hp) = ('a',undef,&nr($1,$2)), return;
    /^${range}d\d+$/
      and output('hunk'), ($ctx,$hm,$hp) = ('d',&nr($1,$2),undef), return;
    /^${range}c${range}$/
      and output('hunk'), ($ctx,$hm,$hp) = ('c',&nr($1,$2),&nr($3,$4)),
        return;
    substr($_, 0, 2) eq '\\ ' && $hend + 1 == $.
      and output('nonl'), return;
    /^(Property changes on: )(.*)/
      and output('svnps', $1, 'svnpd', $2), $prop = $., return;
    if (defined $prop)
      {
        /^_{5,}$/ && $prop + 1 == $.
          and output('svnps'), return;
        /^(Added: |Deleted: )(.*)/
          and output('svnps', $1, 'svnpd', $2), return;
        undef $prop;
      }
    my $s4 = substr $_, 0, 4;
    $s4 eq '--- ' and output('old'), return;
    $s4 eq '+++ ' and output('new'), return;
    /^(diff |Only in )/
      and output('dir'), return;
    /^(Index: )(.*)/
      and output('vcsfs', $1, 'vcsfd', $2), $index = $., return;
    /^(RCS file: |retrieving )(.*)/ || (/^(={5,})$/ && $index + 1 == $.)
      and output('vcsfs', $1, 'vcsfd', $2), return;
    output('normal');
  }