package Inquisitor; # be careful around importing anything since we don't want to pollute the users namespace use strict; use attributes; no warnings; my @preloaded; # Check what's loaded before we pollute the namespace my @checkPreloaded = qw(List::Util File::Spec Sub::Util Cwd Scalar::Util Class::Inspector Encode Encode::Config Encode::Alias Encode::Encoding Encode::utf8 Encode::UTF_EBCDIC Encode::XS Encode::MIME bytes Storable File::Basename parent Encode::MIME::Name); # Mark warnings to detect difference between warnings and errors. Inspired by https://github.com/skaji/syntax-check-perl $SIG{__WARN__} = sub { warn '=PerlWarning=', @_ }; # These modules can cause issues because they wipe the symbol table before we get a chance to inspect it. # Prevent them from loading. # I hope this doesn't cause any issues, perhaps VERSION numbers or import statements would help here # # See https://perldoc.perl.org/perldelta#Calling-the-import-method-of-an-unknown-package-produces-a-warning # for a discussion of why the stub imports are necessary as of Perl 5.40 $INC{'namespace/clean.pm'} = ''; $INC{'namespace/autoclean.pm'} = ''; { no strict 'refs'; *{'namespace::autoclean::VERSION'} = sub { '0.29' }; *{'namespace::clean::VERSION'} = sub { '0.27' }; *{'namespace::autoclean::import'} = sub { }; *{'namespace::clean::import'} = sub { }; } CHECK { if(!$ENV{'PERLNAVIGATORTEST'}){ run(); } } sub run { print "Running inquisitor\n"; my $sourceFilePath = shift; eval { load_test_file($sourceFilePath); populate_preloaded(); load_dependencies(); dump_loaded_mods(); dump_vars_to_main("main"); # This following one has the largest impact on memory and finds less interesting stuff. Low limits though, which probably helps my $allPackages = get_all_packages(); $allPackages = filter_modpacks($allPackages); dump_subs_from_packages($allPackages); my $packages = run_pltags($sourceFilePath); print "Done with pltags. Now dumping same-file packages\n"; foreach my $package (@$packages){ # This is finding packages in the file we're inspecting, and then dumping them into a single namespace in the file if ($package) { dump_vars_to_main($package); dump_inherited_to_main($package); tag_parents($package); } } 1; # For the eval } or do { my $error = $@ || 'Unknown failure'; print "PN:inquistor failed with error: $error\n"; }; } sub load_dependencies { require File::Basename; require File::Spec; require B; require Encode; my $module_dir = File::Spec->catfile( File::Basename::dirname(__FILE__), 'lib_bs22'); unshift @INC, $module_dir; # Sub::Util was added to core in 5.22. The real version can find module names of C code (e.g. List::Util). The fallback can still trace Pure Perl functions require SubUtilPP; require Inspectorito; require Devel::Symdump; } sub load_test_file { # If we're in test mode for a .t file, we haven't loaded the file yet, so let's eval it to populate the symbol table my $filePath = shift; return if !$filePath; my ($source, $offset, $file) = load_source($filePath); $source = "local \$0; BEGIN { \$0 = '${filePath}'; if (\$INC{'FindBin.pm'}) { FindBin->again(); }; }\n# line 0 \"${filePath}\"\nCORE::die('END_EARLY');\n$source"; eval $source; ## no critic if ($@ eq "END_EARLY.\n"){ return; } else { die("Rethrowing error from $filePath: ---$@---"); } } sub maybe_print_sub_info { my ($sFullPath, $sDisplayName, $codeRef, $sSkipPackage, $subType) = @_; $subType = 't' if !$subType; my $UNKNOWN = ""; if (defined &$sFullPath or $codeRef) { $codeRef ||= \&$sFullPath; my $meta = B::svref_2object($codeRef); $meta->isa('B::CV') or return 0; my ($file, $line, $subType) = resolve_file($meta, $subType, $codeRef); my $pack = $UNKNOWN; my $subname = $UNKNOWN; $subname = SubUtilPP::subname($codeRef); $pack = $1 if($subname =~ m/^(.+)::.*?$/); # Subname is a fully qualified name. If it's the normal name, just ignore it. $subname = '' if (($pack and $sSkipPackage and $pack eq $sSkipPackage) or ($pack eq 'main')); return 0 if $file =~ /([\0-\x1F])/ or $pack =~ /([\0-\x1F])/; return 0 if $file =~ /(Moo.pm|Exporter.pm)$/; # Objects pollute the namespace, many things have exporter if (($file and $file ne $0) or ($pack and $pack ne $sSkipPackage)) { # pltags will find everything in $0 / currentpackage, so only include new information. # Note: the above IF is breaking $self-> for Object::Pad for things that exist in current package. # Basically `field $writerField: writer;` is generating `set_writerField` which is then skipped. print_tag($sDisplayName || $sFullPath, $subType, $subname, $file, $pack, $line, '') ; return 1; } } return 0; } sub tag_parents { my $package = shift; no strict 'refs'; my @parents = @{"${package}::ISA"}; my $primaryGuardian = $parents[0]; if($primaryGuardian){ print_tag("$package", '2', $primaryGuardian, '', '', '', ''); } } sub resolve_file { my ($meta, $subType, $codeRef) = @_; my $file = ''; my $line = ''; # Very few things are tagged method, but we can clean up autocomplete if it is. Can something be both an attribute and a attribute? Also, i and t both become x? $subType = 'x' if (grep /^method$/, attributes::get($codeRef)); if ($meta->START->isa('B::COP')){ $file = $meta->START->file; $line = $meta->START->line - 1; } elsif ($meta->GV->isa('B::GV') and $meta->GV->FILE =~ /Class[\\\/](?:XS)?Accessor\.pm$/){ # If something comes from XSAccessor or Accessor, it's an attribute (e.g. Moo, ClassAccessor), but we don't know where in the Moo class it's defined. $subType = 'd'; } elsif(UNIVERSAL::can($meta, 'FILE')){ # Happens for Corinna methods my $testFile = $meta->FILE; if( $testFile !~ /Inquisitor\.pm/ ){ # Are constants and other preprocessing things showing up in the inquistor??? $file = $testFile; } } # Moose (but not Moo) attributes return this for a file. if ($file =~ /^accessor [\w:]+ \(defined at (.+) line (\d+)\)$/){ $file = $1; $line = $2; $subType = 'd'; } return ($file, $line, $subType); } sub print_tag { # Dump details to STDOUT. Format depends on type my ($symbol, $type, $typeDetails, $file, $pack, $line, $value) = @_; #TODO: strip tabs and newlines from all of these? especially value return if $value =~ /[\0-\x1F]/; $file = '' if $file =~ /^\(eval/; $line = 0 if ($line ne '' and $line < 0); print "$symbol\t$type\t$typeDetails\t$file\t$pack\t$line\t$value\n"; } sub run_pltags { require pltags; my $sourceFilePath = shift; my ($source, $offset, $file) = load_source($sourceFilePath); print "\n--------------Now Building the new pltags ---------------------\n"; my $plTagger = pltags->new(); my ($tags, $packages) = $plTagger->build_pltags($source, $offset, $file); # $0 should be the script getting compiled, not this module foreach my $newTag (@$tags){ print $newTag . "\n"; } return $packages } sub dump_vars_to_main { my ($package) = @_; no strict 'refs'; ## no critic my $fullPackage = "${package}::"; foreach my $thing (sort keys %$fullPackage) { next if $thing =~ /^_local_methods( $package ); foreach my $name (@$methods){ next if $name =~ /^(F_|O_|L_)/; # The unhelpful C compiled things if (my $codeRef = UNIVERSAL::can($package, $name) ) { my $iRes = maybe_print_sub_info("${package}::${name}", $name, $codeRef, $package, 'i'); } } } sub populate_preloaded { # Populate preloaded modules before we pollute the symbol table. foreach my $mod (@checkPreloaded){ # Ideally we'd use Module::Loaded, but it only became core in Perl 5.9 my $file = $mod . ".pm"; $file =~ s/::/\//g; push (@preloaded, $mod) if $INC{$file}; } } sub dump_subs_from_packages { my ($modpacks, $seen, $allowance) = @_; my $totalCount = 0; my %baseCount; my $baseRegex = qr/^(\w+)/; # Just in case we find too much stuff. Arbitrary limit of 100 subs per module, 200 fully loaded packages. # results in 10 fully loaded files in the server before we start dropping them on the ground because of the lru-cache # Test with these limits and then bump them up if things are working well my $modLimit = 150; my $nameSpaceLimit = 10000; # Applied to Foo in Foo::Bar my $totalLimit = 30000; INSPECTOR: foreach my $mod (@$modpacks){ my $pkgCount = 0; next INSPECTOR if($mod =~ $baseRegex and $baseCount{$1} > $nameSpaceLimit); my $methods = Inspectorito->local_methods( $mod ); next INSPECTOR if !defined($methods); #my $methods = ClassInspector->functions( $mod ); # Less memory, but less accurate? # Sort because we have a memory limit and want to cut the less important things. @$methods = sort { ($a =~ /^[A-Z][A-Z_]+$/) cmp ($b =~ /[A-Z][A-Z_]+$/) # Anything all UPPERCASE is at the end || ($a =~ /^_/) cmp ($b =~ /^_/) # Private methods are 2nd to last || $a cmp $b } @$methods; # Normal stuff up front. Order doesn't really matter, but sort anyway for readability foreach my $name (@$methods){ next if $name =~ /^(F_|O_|L_)/; # The unhelpful C compiled things if (my $codeRef = UNIVERSAL::can($mod, $name) ) { # TODO: Differentiate functions vs methods. Methods come from here, but so do functions. Perl mixes the two definitions anyway. my $iRes = maybe_print_sub_info("${mod}::${name}", '', $codeRef); $pkgCount += $iRes; $totalCount += $iRes; } last INSPECTOR if $totalCount > $totalLimit; next INSPECTOR if $pkgCount > $modLimit; } $baseCount{$1} += $pkgCount if ($mod =~ $baseRegex); } return; } sub filter_modpacks { my ($modpacks) = @_; # Some of these things I've imported in here, some are just piles of C code. # We'll still nav to modules and find anything explictly imported so we can be aggressive at removing these. my @to_remove = ("if", "Cwd", "B", "main","version","POSIX","Fcntl","Errno","Socket", "DynaLoader","CORE","utf8","UNIVERSAL","PerlIO","re","Internals","strict","mro","Regexp", "Exporter","Inquisitor", "XSLoader","attributes", "warnings","strict","utf8", "constant","XSLoader", "Carp", "Inspectorito", "SubUtilPP", "base", "Config", "overloading", "Devel::Symdump", "vars", "Tie::Hash::NamedCapture", "Text::Balanced", "Filter::Util::Call", "IO::Poll", "IO::Seekable", "IO::Handle", "IO::File", "Symbol", "IO", "SelectSaver", "overload", "Filter::Simple", "SelfLoader", "PerlIO::Layer", "Text::Balanced::Extractor", "IO::Socket", @checkPreloaded); my %filter = map { $_ => 1 } @to_remove; # Exporter:: should remove Heavy and Tiny, Moose::Meta is removed just because it drops more than 1500 things and I don't care about any of them my $filter_regex = qr/^(File::Spec::|warnings::register|lib_bs22::|Exporter::|Moose::Meta::|Class::MOP::|B::|Config::)/; # TODO: Allow keeping some of these my $private = qr/::_\w+/; foreach (@preloaded) { $filter{$_} = 0 }; my @filtered = grep { !$filter{$_} and $_ !~ $filter_regex and $_ !~ $private} @$modpacks; # Sort by depth and then alphabetically. The assumption is that more nested things are less important. # Also, consistent sorting makes debugging easier. @filtered = sort { my $depth_a = $a =~ tr/:://; my $depth_b = $b =~ tr/:://; # First, sort by depth if ($depth_a != $depth_b) { return $depth_a <=> $depth_b; } # Second, sort alphabetically return $a cmp $b; } @filtered; return \@filtered; } sub dump_loaded_mods { my @modules = my $displays = {}; foreach my $module (keys %INC) { my $display_mod = $module; $display_mod =~ s/[\/\\]/::/g; $display_mod =~ s/(?:\.pm|\.pl)$//g; next if $display_mod =~ /lib_bs22::|^(Inquisitor|B)$/; next if !Inspectorito->loaded($display_mod); $displays->{$display_mod} = $INC{$module}; } my $filtered_modules = filter_modpacks([keys %$displays]); foreach my $key_to_print (@$filtered_modules) { my $path = $displays->{$key_to_print}; next if !$path; # If we don't have a path, the modHunter module would be better print_tag("$key_to_print", "m", "", $path, $key_to_print, 0, ""); } return; } sub get_all_packages { my $obj = Devel::Symdump->rnew(); my @allPackages = $obj->packages; return \@allPackages; } sub load_source { my $sourceFilePath = shift; # Only set during testing. my ($source, $offset, $file); if($sourceFilePath){ # Currently loading the source twice, which is a waste. TODO: Move some stuff around open my $fh, '<:utf8', $sourceFilePath or die "Can't open file $!"; ## no critic (UTF8) $file = $sourceFilePath; $source = do { local $/; <$fh> }; $offset = 1; close($fh); } elsif ($INC{"lib_bs22/SourceStash.pm"}){ # Path run during the extension $source = $lib_bs22::SourceStash::source; $file = $lib_bs22::SourceStash::filename; $source = Encode::decode('utf-8', $source); $offset = 3; } else{ # Used for debugging the extension and shown to users in the log require File::Spec; # TODO: Adjust PLTags offset in this case. $file = File::Spec->rel2abs($0); open my $fh, '<:utf8', $file or die "Can't open file $!"; ## no critic (UTF8) $source = do { local $/; <$fh> }; $offset = 1; close($fh); } $source = "" if !defined($source); return ($source, $offset, $file); } sub tags_to_symbols { # Currently only used for testing. Turns an output of tags into a hash of symbol array, similiar to ParseDocument.ts my $tags = shift; my $symbols = {}; foreach my $tag_str (split("\n", $tags)){ my @pieces = split("\t", $tag_str, -1); if( scalar( @pieces ) == 7 ){ my ($tag, $type, $typeDetails, $file, $package_name, $line) = @pieces; $symbols->{$tag} = [] if !exists($symbols->{$tag}); push @{ $symbols->{$tag} }, {'type'=> $type, 'typeDetails' => $typeDetails, 'file'=>$file, 'package_name'=>$package_name, 'line'=>$line}; } } return $symbols; } 1;