#!/usr/bin/perl -w

# grep in C/C++ source/comments/strings/preprocessor
#
# usage: cgrep [-s] [-d] [-c] [-p] [-n] \
#              [-i] [-D define] [-U undefine] \
#              regexp [file...]
#
# -c (to search comments) may be split into separate C and C++ comment
#    subcategories;
# -s and -d (to search single- and double-quoted strings) have no
#    combined equivalent; do they need one?
# -p searches in preprocessor lines
# -n searches in everything else
#
# -i ignores case
# -D defines a symbol for #ifdef
# -U undefines a symbol for #ifdef
#
# unmentioned symbols count as both defined and undefined, so both the
# #if and the #else should be searched (though note that this can
# currently fail to do the right thing if a #elseif defined() refers
# to a known symbol, but other #else sections don't.
#
# TBD:
# * add --help (probably using perldoc)
# * support preprocessor properly, rather than using heuristics to
#   guess at the meaning of #ifdef etc.
# * add in some more options used by a real grep; ought to be done
#   soon, as it may require renaming these options
# * optionally print e.g. just the non-comment data on the line
# * allow grepping across two categories of data with a single regexp
# * support grepping in #included files automatically (or optionally
#   just #include "..." and not #include <...>)
# * support -Dname=value as well as just -Dname

use Getopt::Long;
%search=("q"=>0,"qq"=>0,"c"=>0,"cc"=>0,"norm"=>0,"p"=>0);
$ignore=0;
Getopt::Long::Configure("bundling");
GetOptions("s|single-quoted"=>\$search{"q"},
           "d|double-quoted"=>\$search{"qq"},
           "c|comments"     =>sub{$search{"c"}=$search{"cc"}=$_[1]},
           "C|c-comment"    =>\$search{"c"},
           "cplusplus-comment|cc-comment|cpp-comment"=>\$search{"c"},
           "p|preprocessor" =>\$search{"p"},
           "n|normal"       =>\$search{"norm"},
           "i|ignore-case"  =>\$ignore,
           "D|define=s"     =>sub{$defined{$_[1]}=1},
           "U|undefine=s"   =>sub{$defined{$_[1]}=0});
$NORM=0; #normal text
$Q=1;    #single-quoted text
$QQ=2;   #double-quoted text
$C=3;    #C comment
$CC=4;   #C++ comment

$state=$NORM;
$cont=0;

$pattern=shift @ARGV;

if($ignore){
    $pattern="(?i)".$pattern;
}

@ARGV="-" unless @ARGV;

foreach $file (@ARGV){
    $nest="a";#0=no,1=yes,x=done,a=all; push onto start of string
    unless(open FILE,"<$file"){
        print STDERR "Failed to open $file: $!\n";
        next;
    }
    while(<FILE>){
        chomp;s/\r$//;
        $org=$_; #original line for output
        $state=$NORM if ($state eq $CC && !$cont);

        $preprocessor=0;
        if($state==$NORM&&/^\s*\#\s*(.*)$/){
            $preprocessor=1;
            $pre=$1;
            if($pre=~/^if(?:def\s+|\s+defined[\s\(]+)([^\s\)]+)/){
                #ifdef $1
                if(defined($defined{$1})){
                    $nest=$defined{$1}.$nest;
                }else{
                    $nest="a$nest";
                }
            }elsif($pre=~/^if(?:ndef\s+|\s*!\s*defined[\s\(]+)([^\s\)]+)/){
                #ifndef $1
                if(defined($defined{$1})){
                    $nest=(1-$defined{$1}).$nest;
                }else{
                    $nest="a$nest";
                }
            }elsif($pre=~/^if\s+0\b/){
                #if 0
                $nest="0$nest";
            }elsif($pre=~/^if\s+1\b/){
                #if 1
                $nest="1$nest";
            }elsif($pre=~/^if/){
                #if
                $nest="a$nest";
            }elsif($pre=~/^elseif\s+defined[\s\(]+([^\s\)]+)/){
                #elseifdef $1
                if($nest=~s/^0//){
                    if(defined($defined{$1})){
                        $nest=$defined{$1}.$nest;
                    }else{
                        $nest="a$nest";
                    }
                }else{
                    $nest=~s/^1/x/;
                }
            }elsif($pre=~/^if\s*!\s*defined[\s\(]+([^\s\)]+)/){
                #elseifndef $1
                if($nest=~s/^0//){
                    if(defined($defined{$1})){
                        $nest=(1-$defined{$1}).$nest;
                    }else{
                        $nest="a$nest";
                    }
                }else{
                    $nest=~s/^1/x/;
                }
            }elsif($pre=~/^elseif\s+0\b/){
                #elseif 0
                $nest=~s/^1/x/;
            }elsif($pre=~/^elseif\s+1\b/){
                #elseif 1
                $nest=~s/^0/1/ or $nest=~s/^1/x/;
            }elsif($pre=~/^elseif/){
                #elseif
                $nest=~s/^[01]/a/;
            }elsif($pre=~/^else\b/){
                #else
                $nest=~s/^0/1/ or $nest=~s/^./x/;
            }elsif($pre=~/^endif\b/){
                #endif
                $nest=~s/^.//;
            }
        }

        $valid=($nest=~/^[a1]+$/);
        #print "valid=$valid nest=$nest\t$_\n";

        $cont=0;
        $normtext="";
        $qtext="";
        $qqtext="";
        $ctext="";
        $cctext="";
        $normsep="";
        $qsep="";
        $qqsep="";
        $csep="";
        while($_ ne ""){
            if($state==$NORM){
                if(s/^\'//){
                    $state=$Q;
                }elsif(s/^\"//){
                    $state=$QQ;
                }elsif(s!^/\*!!){
                    $state=$C;
                }elsif(s!^//!!){
                    $state=$CC;
                }elsif(s/^\\$//){
                    $cont=1;
                }elsif(s!^([^\'\"/\\]+|\\.|/)!!){
                    $normtext.=$normsep.$1;
                    $qsep=" " unless $qtext eq "";
                    $qqsep=" " unless $qqtext eq "";
                    $csep=" " unless $ctext eq "";
                }else{
                    die "Can't handle NORM \"$_\"";
                }
            }elsif($state==$Q){
                if(s/^\'//){
                    $state=$NORM;
                }elsif(s/^\\$//){
                    $cont=1;
                }elsif(s!^([^\'\\]+|\\.)!!){
                    $qtext.=$qsep.$1;
                    $normsep=" " unless $normtext eq "";
                    $qqsep=" " unless $qqtext eq "";
                    $csep=" " unless $ctext eq "";
                }else{
                    die "Can't handle Q \"$_\"";
                }
            }elsif($state==$QQ){
                if(s/^\"//){
                    $state=$NORM;
                }elsif(s/^\\$//){
                    $cont=1;
                }elsif(s!^([^\"\\]+|\\.)!!){
                    $qqtext.=$qqsep.$1;
                    $normsep=" " unless $normtext eq "";
                    $qsep=" " unless $qtext eq "";
                    $csep=" " unless $ctext eq "";
                }else{
                    die "Can't handle Q \"$_\"";
                }
            }elsif($state==$C){
                if(s!^\*/!!){
                    $state=$NORM;
                }elsif(s/^\\$//){
                    $cont=1;
                }elsif(s/^([^\*]+|\*)//){
                    $ctext.=$csep.$1;
                    $normsep=" " unless $normtext eq "";
                    $qsep=" " unless $qtext eq "";
                    $qqsep=" " unless $qqtext eq "";
                }else{
                    die "Can't handle C \"$_\"";
                }
            }elsif($state==$CC){
                if(s/\\$//){
                    $cont=1;
                }
                $cctext.=$_;
                $_="";
            }
        }
        if($valid){
            if($search{$preprocessor?"p":"norm"}&&$normtext=~/$pattern/o ||
               $search{"c"}   &&$ctext   =~/$pattern/o ||
               $search{"cc"}  &&$cctext  =~/$pattern/o ||
               $search{"q"}   &&$qtext   =~/$pattern/o ||
               $search{"qq"}  &&$qqtext  =~/$pattern/o){
                if(@ARGV>1){
                    print "$file:";
                }
                print "$org\n";
            }
        }
    }
    close FILE;
}