#!/usr/bin/perl -w # grep in C/C++ source/comments/strings/preprocessor # # usage: cgrep [-s] [-d] [-c] [-p] [-n] \ # [-i] [-D define] [-U undefine] \ # regexp [file...] # # -c (to search comments) may be split into separate C and C++ comment # subcategories; # -s and -d (to search single- and double-quoted strings) have no # combined equivalent; do they need one? # -p searches in preprocessor lines # -n searches in everything else # # -i ignores case # -D defines a symbol for #ifdef # -U undefines a symbol for #ifdef # # unmentioned symbols count as both defined and undefined, so both the # #if and the #else should be searched (though note that this can # currently fail to do the right thing if a #elseif defined() refers # to a known symbol, but other #else sections don't. # # TBD: # * add --help (probably using perldoc) # * support preprocessor properly, rather than using heuristics to # guess at the meaning of #ifdef etc. # * add in some more options used by a real grep; ought to be done # soon, as it may require renaming these options # * optionally print e.g. just the non-comment data on the line # * allow grepping across two categories of data with a single regexp # * support grepping in #included files automatically (or optionally # just #include "..." and not #include <...>) # * support -Dname=value as well as just -Dname use Getopt::Long; %search=("q"=>0,"qq"=>0,"c"=>0,"cc"=>0,"norm"=>0,"p"=>0); $ignore=0; Getopt::Long::Configure("bundling"); GetOptions("s|single-quoted"=>\$search{"q"}, "d|double-quoted"=>\$search{"qq"}, "c|comments" =>sub{$search{"c"}=$search{"cc"}=$_[1]}, "C|c-comment" =>\$search{"c"}, "cplusplus-comment|cc-comment|cpp-comment"=>\$search{"c"}, "p|preprocessor" =>\$search{"p"}, "n|normal" =>\$search{"norm"}, "i|ignore-case" =>\$ignore, "D|define=s" =>sub{$defined{$_[1]}=1}, "U|undefine=s" =>sub{$defined{$_[1]}=0}); $NORM=0; #normal text $Q=1; #single-quoted text $QQ=2; #double-quoted text $C=3; #C comment $CC=4; #C++ comment $state=$NORM; $cont=0; $pattern=shift @ARGV; if($ignore){ $pattern="(?i)".$pattern; } @ARGV="-" unless @ARGV; foreach $file (@ARGV){ $nest="a";#0=no,1=yes,x=done,a=all; push onto start of string unless(open FILE,"<$file"){ print STDERR "Failed to open $file: $!\n"; next; } while(){ chomp;s/\r$//; $org=$_; #original line for output $state=$NORM if ($state eq $CC && !$cont); $preprocessor=0; if($state==$NORM&&/^\s*\#\s*(.*)$/){ $preprocessor=1; $pre=$1; if($pre=~/^if(?:def\s+|\s+defined[\s\(]+)([^\s\)]+)/){ #ifdef $1 if(defined($defined{$1})){ $nest=$defined{$1}.$nest; }else{ $nest="a$nest"; } }elsif($pre=~/^if(?:ndef\s+|\s*!\s*defined[\s\(]+)([^\s\)]+)/){ #ifndef $1 if(defined($defined{$1})){ $nest=(1-$defined{$1}).$nest; }else{ $nest="a$nest"; } }elsif($pre=~/^if\s+0\b/){ #if 0 $nest="0$nest"; }elsif($pre=~/^if\s+1\b/){ #if 1 $nest="1$nest"; }elsif($pre=~/^if/){ #if $nest="a$nest"; }elsif($pre=~/^elseif\s+defined[\s\(]+([^\s\)]+)/){ #elseifdef $1 if($nest=~s/^0//){ if(defined($defined{$1})){ $nest=$defined{$1}.$nest; }else{ $nest="a$nest"; } }else{ $nest=~s/^1/x/; } }elsif($pre=~/^if\s*!\s*defined[\s\(]+([^\s\)]+)/){ #elseifndef $1 if($nest=~s/^0//){ if(defined($defined{$1})){ $nest=(1-$defined{$1}).$nest; }else{ $nest="a$nest"; } }else{ $nest=~s/^1/x/; } }elsif($pre=~/^elseif\s+0\b/){ #elseif 0 $nest=~s/^1/x/; }elsif($pre=~/^elseif\s+1\b/){ #elseif 1 $nest=~s/^0/1/ or $nest=~s/^1/x/; }elsif($pre=~/^elseif/){ #elseif $nest=~s/^[01]/a/; }elsif($pre=~/^else\b/){ #else $nest=~s/^0/1/ or $nest=~s/^./x/; }elsif($pre=~/^endif\b/){ #endif $nest=~s/^.//; } } $valid=($nest=~/^[a1]+$/); #print "valid=$valid nest=$nest\t$_\n"; $cont=0; $normtext=""; $qtext=""; $qqtext=""; $ctext=""; $cctext=""; $normsep=""; $qsep=""; $qqsep=""; $csep=""; while($_ ne ""){ if($state==$NORM){ if(s/^\'//){ $state=$Q; }elsif(s/^\"//){ $state=$QQ; }elsif(s!^/\*!!){ $state=$C; }elsif(s!^//!!){ $state=$CC; }elsif(s/^\\$//){ $cont=1; }elsif(s!^([^\'\"/\\]+|\\.|/)!!){ $normtext.=$normsep.$1; $qsep=" " unless $qtext eq ""; $qqsep=" " unless $qqtext eq ""; $csep=" " unless $ctext eq ""; }else{ die "Can't handle NORM \"$_\""; } }elsif($state==$Q){ if(s/^\'//){ $state=$NORM; }elsif(s/^\\$//){ $cont=1; }elsif(s!^([^\'\\]+|\\.)!!){ $qtext.=$qsep.$1; $normsep=" " unless $normtext eq ""; $qqsep=" " unless $qqtext eq ""; $csep=" " unless $ctext eq ""; }else{ die "Can't handle Q \"$_\""; } }elsif($state==$QQ){ if(s/^\"//){ $state=$NORM; }elsif(s/^\\$//){ $cont=1; }elsif(s!^([^\"\\]+|\\.)!!){ $qqtext.=$qqsep.$1; $normsep=" " unless $normtext eq ""; $qsep=" " unless $qtext eq ""; $csep=" " unless $ctext eq ""; }else{ die "Can't handle Q \"$_\""; } }elsif($state==$C){ if(s!^\*/!!){ $state=$NORM; }elsif(s/^\\$//){ $cont=1; }elsif(s/^([^\*]+|\*)//){ $ctext.=$csep.$1; $normsep=" " unless $normtext eq ""; $qsep=" " unless $qtext eq ""; $qqsep=" " unless $qqtext eq ""; }else{ die "Can't handle C \"$_\""; } }elsif($state==$CC){ if(s/\\$//){ $cont=1; } $cctext.=$_; $_=""; } } if($valid){ if($search{$preprocessor?"p":"norm"}&&$normtext=~/$pattern/o || $search{"c"} &&$ctext =~/$pattern/o || $search{"cc"} &&$cctext =~/$pattern/o || $search{"q"} &&$qtext =~/$pattern/o || $search{"qq"} &&$qqtext =~/$pattern/o){ if(@ARGV>1){ print "$file:"; } print "$org\n"; } } } close FILE; }