# HG changeset patch # User thomas_-_s # Date 1211807104 0 # Node ID 0f116c3002d7a64a3d725bd1586434d7b8d31d44 # Parent 61ae1529b238279eb25c333f3e4741b0014d38b6 removed perltidy from svn. Hopefully ohloh will update itself correctly again. git-svn-id: svn://svn.code.sf.net/p/universalindent/code/trunk@706 59b1889a-e5ac-428c-b0c7-476e01d41282 diff -r 61ae1529b238 -r 0f116c3002d7 indenters/PerlTidyLib.pm --- a/indenters/PerlTidyLib.pm Mon May 26 12:26:19 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28266 +0,0 @@ -############################################################ -# -# perltidy - a perl script indenter and formatter -# -# Copyright (c) 2000-2007 by Steve Hancock -# Distributed under the GPL license agreement; see file COPYING -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# For brief instructions instructions, try 'perltidy -h'. -# For more complete documentation, try 'man perltidy' -# or visit http://perltidy.sourceforge.net -# -# This script is an example of the default style. It was formatted with: -# -# perltidy Tidy.pm -# -# Code Contributions: -# Michael Cartmell supplied code for adaptation to VMS and helped with -# v-strings. -# Hugh S. Myers supplied sub streamhandle and the supporting code to -# create a Perl::Tidy module which can operate on strings, arrays, etc. -# Yves Orton supplied coding to help detect Windows versions. -# Axel Rose supplied a patch for MacPerl. -# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. -# Dan Tyrell contributed a patch for binary I/O. -# Ueli Hugenschmidt contributed a patch for -fpsc -# Many others have supplied key ideas, suggestions, and bug reports; -# see the CHANGES file. -# -############################################################ - -package Perl::Tidy; -use 5.004; # need IO::File from 5.004 or later -BEGIN { $^W = 1; } # turn on warnings - -use strict; -use Exporter; -use Carp; -$|++; - -use vars qw{ - $VERSION - @ISA - @EXPORT - $missing_file_spec -}; - -@ISA = qw( Exporter ); -@EXPORT = qw( &perltidy ); - -use IO::File; -use File::Basename; - -BEGIN { - ( $VERSION = q($Id: Tidy.pm,v 1.73 2007/12/05 17:51:17 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker -} - -sub streamhandle { - - # given filename and mode (r or w), create an object which: - # has a 'getline' method if mode='r', and - # has a 'print' method if mode='w'. - # The objects also need a 'close' method. - # - # How the object is made: - # - # if $filename is: Make object using: - # ---------------- ----------------- - # '-' (STDIN if mode = 'r', STDOUT if mode='w') - # string IO::File - # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) - # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) - # object object - # (check for 'print' method for 'w' mode) - # (check for 'getline' method for 'r' mode) - my $ref = ref( my $filename = shift ); - my $mode = shift; - my $New; - my $fh; - - # handle a reference - if ($ref) { - if ( $ref eq 'ARRAY' ) { - $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; - } - elsif ( $ref eq 'SCALAR' ) { - $New = sub { Perl::Tidy::IOScalar->new(@_) }; - } - else { - - # Accept an object with a getline method for reading. Note: - # IO::File is built-in and does not respond to the defined - # operator. If this causes trouble, the check can be - # skipped and we can just let it crash if there is no - # getline. - if ( $mode =~ /[rR]/ ) { - if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { - $New = sub { $filename }; - } - else { - $New = sub { undef }; - confess <new(@_) }; - } - } - $fh = $New->( $filename, $mode ) - or warn "Couldn't open file:$filename in mode:$mode : $!\n"; - return $fh, ( $ref or $filename ); -} - -sub find_input_line_ending { - - # Peek at a file and return first line ending character. - # Quietly return undef in case of any trouble. - my ($input_file) = @_; - my $ending; - - # silently ignore input from object or stdin - if ( ref($input_file) || $input_file eq '-' ) { - return $ending; - } - open( INFILE, $input_file ) || return $ending; - - binmode INFILE; - my $buf; - read( INFILE, $buf, 1024 ); - close INFILE; - if ( $buf && $buf =~ /([\012\015]+)/ ) { - my $test = $1; - - # dos - if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } - - # mac - elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } - - # unix - elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } - - # unknown - else { } - } - - # no ending seen - else { } - - return $ending; -} - -sub catfile { - - # concatenate a path and file basename - # returns undef in case of error - - BEGIN { eval "require File::Spec"; $missing_file_spec = $@; } - - # use File::Spec if we can - unless ($missing_file_spec) { - return File::Spec->catfile(@_); - } - - # Perl 5.004 systems may not have File::Spec so we'll make - # a simple try. We assume File::Basename is available. - # return undef if not successful. - my $name = pop @_; - my $path = join '/', @_; - my $test_file = $path . $name; - my ( $test_name, $test_path ) = fileparse($test_file); - return $test_file if ( $test_name eq $name ); - return undef if ( $^O eq 'VMS' ); - - # this should work at least for Windows and Unix: - $test_file = $path . '/' . $name; - ( $test_name, $test_path ) = fileparse($test_file); - return $test_file if ( $test_name eq $name ); - return undef; -} - -sub make_temporary_filename { - - # Make a temporary filename. - # - # The POSIX tmpnam() function tends to be unreliable for non-unix - # systems (at least for the win32 systems that I've tested), so use - # a pre-defined name. A slight disadvantage of this is that two - # perltidy runs in the same working directory may conflict. - # However, the chance of that is small and managable by the user. - # An alternative would be to check for the file's existance and use, - # say .TMP0, .TMP1, etc, but that scheme has its own problems. So, - # keep it simple. - my $name = "perltidy.TMP"; - if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) { - return $name; - } - eval "use POSIX qw(tmpnam)"; - if ($@) { return $name } - use IO::File; - - # just make a couple of tries before giving up and using the default - for ( 0 .. 1 ) { - my $tmpname = tmpnam(); - my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL ); - if ($fh) { - $fh->close(); - return ($tmpname); - last; - } - } - return ($name); -} - -# Here is a map of the flow of data from the input source to the output -# line sink: -# -# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> -# input groups output -# lines tokens lines of lines lines -# lines -# -# The names correspond to the package names responsible for the unit processes. -# -# The overall process is controlled by the "main" package. -# -# LineSource is the stream of input lines -# -# Tokenizer analyzes a line and breaks it into tokens, peeking ahead -# if necessary. A token is any section of the input line which should be -# manipulated as a single entity during formatting. For example, a single -# ',' character is a token, and so is an entire side comment. It handles -# the complexities of Perl syntax, such as distinguishing between '<<' as -# a shift operator and as a here-document, or distinguishing between '/' -# as a divide symbol and as a pattern delimiter. -# -# Formatter inserts and deletes whitespace between tokens, and breaks -# sequences of tokens at appropriate points as output lines. It bases its -# decisions on the default rules as modified by any command-line options. -# -# VerticalAligner collects groups of lines together and tries to line up -# certain tokens, such as '=>', '#', and '=' by adding whitespace. -# -# FileWriter simply writes lines to the output stream. -# -# The Logger package, not shown, records significant events and warning -# messages. It writes a .LOG file, which may be saved with a -# '-log' or a '-g' flag. - -{ - - # variables needed by interrupt handler: - my $tokenizer; - my $input_file; - - # this routine may be called to give a status report if interrupted. If a - # parameter is given, it will call exit with that parameter. This is no - # longer used because it works under Unix but not under Windows. - sub interrupt_handler { - - my $exit_flag = shift; - print STDERR "perltidy interrupted"; - if ($tokenizer) { - my $input_line_number = - Perl::Tidy::Tokenizer::get_input_line_number(); - print STDERR " at line $input_line_number"; - } - if ($input_file) { - - if ( ref $input_file ) { print STDERR " of reference to:" } - else { print STDERR " of file:" } - print STDERR " $input_file"; - } - print STDERR "\n"; - exit $exit_flag if defined($exit_flag); - } - - sub perltidy { - - my %defaults = ( - argv => undef, - destination => undef, - formatter => undef, - logfile => undef, - errorfile => undef, - perltidyrc => undef, - source => undef, - stderr => undef, - dump_options => undef, - dump_options_type => undef, - dump_getopt_flags => undef, - dump_options_category => undef, - dump_options_range => undef, - dump_abbreviations => undef, - ); - - # don't overwrite callers ARGV - local @ARGV = @ARGV; - - my %input_hash = @_; - - if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { - local $" = ')('; - my @good_keys = sort keys %defaults; - @bad_keys = sort @bad_keys; - confess <('dump_options'); - my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); - my $dump_options_category = $get_hash_ref->('dump_options_category'); - my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); - my $dump_options_range = $get_hash_ref->('dump_options_range'); - - # validate dump_options_type - if ( defined($dump_options) ) { - unless ( defined($dump_options_type) ) { - $dump_options_type = 'perltidyrc'; - } - unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { - croak <new(); - } - - # see if ARGV is overridden - if ( defined($argv) ) { - - my $rargv = ref $argv; - if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } - - # ref to ARRAY - if ($rargv) { - if ( $rargv eq 'ARRAY' ) { - @ARGV = @$argv; - } - else { - croak <{$opt} = $flag; - } - } - - if ( defined($dump_options_category) ) { - $quit_now = 1; - %{$dump_options_category} = %{$roption_category}; - } - - if ( defined($dump_options_range) ) { - $quit_now = 1; - %{$dump_options_range} = %{$roption_range}; - } - - if ( defined($dump_abbreviations) ) { - $quit_now = 1; - %{$dump_abbreviations} = %{$rexpansion}; - } - - if ( defined($dump_options) ) { - $quit_now = 1; - %{$dump_options} = %{$rOpts}; - } - - return if ($quit_now); - - # make printable string of options for this run as possible diagnostic - my $readable_options = readable_options( $rOpts, $roption_string ); - - # dump from command line - if ( $rOpts->{'dump-options'} ) { - print STDOUT $readable_options; - exit 1; - } - - check_options( $rOpts, $is_Windows, $Windows_type, - $rpending_complaint ); - - if ($user_formatter) { - $rOpts->{'format'} = 'user'; - } - - # there must be one entry here for every possible format - my %default_file_extension = ( - tidy => 'tdy', - html => 'html', - user => '', - ); - - # be sure we have a valid output format - unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { - my $formats = join ' ', - sort map { "'" . $_ . "'" } keys %default_file_extension; - my $fmt = $rOpts->{'format'}; - die "-format='$fmt' but must be one of: $formats\n"; - } - - my $output_extension = - make_extension( $rOpts->{'output-file-extension'}, - $default_file_extension{ $rOpts->{'format'} }, $dot ); - - my $backup_extension = - make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); - - my $html_toc_extension = - make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); - - my $html_src_extension = - make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); - - # check for -b option; - my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} - && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode - && @ARGV > 0; # silently ignore if standard input; - # this allows -b to be in a .perltidyrc file - # without error messages when running from an editor - - # turn off -b with warnings in case of conflicts with other options - if ($in_place_modify) { - if ( $rOpts->{'standard-output'} ) { - warn "Ignoring -b; you may not use -b and -st together\n"; - $in_place_modify = 0; - } - if ($destination_stream) { - warn -"Ignoring -b; you may not specify a destination array and -b together\n"; - $in_place_modify = 0; - } - if ($source_stream) { - warn -"Ignoring -b; you may not specify a source array and -b together\n"; - $in_place_modify = 0; - } - if ( $rOpts->{'outfile'} ) { - warn "Ignoring -b; you may not use -b and -o together\n"; - $in_place_modify = 0; - } - if ( defined( $rOpts->{'output-path'} ) ) { - warn "Ignoring -b; you may not use -b and -opath together\n"; - $in_place_modify = 0; - } - } - - Perl::Tidy::Formatter::check_options($rOpts); - if ( $rOpts->{'format'} eq 'html' ) { - Perl::Tidy::HtmlWriter->check_options($rOpts); - } - - # make the pattern of file extensions that we shouldn't touch - my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; - if ($output_extension) { - my $ext = quotemeta($output_extension); - $forbidden_file_extensions .= "|$ext"; - } - if ( $in_place_modify && $backup_extension ) { - my $ext = quotemeta($backup_extension); - $forbidden_file_extensions .= "|$ext"; - } - $forbidden_file_extensions .= ')$'; - - # Create a diagnostics object if requested; - # This is only useful for code development - my $diagnostics_object = undef; - if ( $rOpts->{'DIAGNOSTICS'} ) { - $diagnostics_object = Perl::Tidy::Diagnostics->new(); - } - - # no filenames should be given if input is from an array - if ($source_stream) { - if ( @ARGV > 0 ) { - die -"You may not specify any filenames when a source array is given\n"; - } - - # we'll stuff the source array into ARGV - unshift( @ARGV, $source_stream ); - - # No special treatment for source stream which is a filename. - # This will enable checks for binary files and other bad stuff. - $source_stream = undef unless ref($source_stream); - } - - # use stdin by default if no source array and no args - else { - unshift( @ARGV, '-' ) unless @ARGV; - } - - # loop to process all files in argument list - my $number_of_files = @ARGV; - my $formatter = undef; - $tokenizer = undef; - while ( $input_file = shift @ARGV ) { - my $fileroot; - my $input_file_permissions; - - #--------------------------------------------------------------- - # determine the input file name - #--------------------------------------------------------------- - if ($source_stream) { - $fileroot = "perltidy"; - } - elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN - $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc - $in_place_modify = 0; - } - else { - $fileroot = $input_file; - unless ( -e $input_file ) { - - # file doesn't exist - check for a file glob - if ( $input_file =~ /([\?\*\[\{])/ ) { - - # Windows shell may not remove quotes, so do it - my $input_file = $input_file; - if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } - if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } - my $pattern = fileglob_to_re($input_file); - eval "/$pattern/"; - if ( !$@ && opendir( DIR, './' ) ) { - my @files = - grep { /$pattern/ && !-d $_ } readdir(DIR); - closedir(DIR); - if (@files) { - unshift @ARGV, @files; - next; - } - } - } - print "skipping file: '$input_file': no matches found\n"; - next; - } - - unless ( -f $input_file ) { - print "skipping file: $input_file: not a regular file\n"; - next; - } - - unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { - print -"skipping file: $input_file: Non-text (override with -f)\n"; - next; - } - - # we should have a valid filename now - $fileroot = $input_file; - $input_file_permissions = ( stat $input_file )[2] & 07777; - - if ( $^O eq 'VMS' ) { - ( $fileroot, $dot ) = check_vms_filename($fileroot); - } - - # add option to change path here - if ( defined( $rOpts->{'output-path'} ) ) { - - my ( $base, $old_path ) = fileparse($fileroot); - my $new_path = $rOpts->{'output-path'}; - unless ( -d $new_path ) { - unless ( mkdir $new_path, 0777 ) { - die "unable to create directory $new_path: $!\n"; - } - } - my $path = $new_path; - $fileroot = catfile( $path, $base ); - unless ($fileroot) { - die <new( $input_file, $rOpts, - $rpending_logfile_message ); - next unless ($source_object); - - # register this file name with the Diagnostics package - $diagnostics_object->set_input_file($input_file) - if $diagnostics_object; - - #--------------------------------------------------------------- - # determine the output file name - #--------------------------------------------------------------- - my $output_file = undef; - my $actual_output_extension; - - if ( $rOpts->{'outfile'} ) { - - if ( $number_of_files <= 1 ) { - - if ( $rOpts->{'standard-output'} ) { - die "You may not use -o and -st together\n"; - } - elsif ($destination_stream) { - die -"You may not specify a destination array and -o together\n"; - } - elsif ( defined( $rOpts->{'output-path'} ) ) { - die "You may not specify -o and -opath together\n"; - } - elsif ( defined( $rOpts->{'output-file-extension'} ) ) { - die "You may not specify -o and -oext together\n"; - } - $output_file = $rOpts->{outfile}; - - # make sure user gives a file name after -o - if ( $output_file =~ /^-/ ) { - die "You must specify a valid filename after -o\n"; - } - - # do not overwrite input file with -o - if ( defined($input_file_permissions) - && ( $output_file eq $input_file ) ) - { - die - "Use 'perltidy -b $input_file' to modify in-place\n"; - } - } - else { - die "You may not use -o with more than one input file\n"; - } - } - elsif ( $rOpts->{'standard-output'} ) { - if ($destination_stream) { - die -"You may not specify a destination array and -st together\n"; - } - $output_file = '-'; - - if ( $number_of_files <= 1 ) { - } - else { - die "You may not use -st with more than one input file\n"; - } - } - elsif ($destination_stream) { - $output_file = $destination_stream; - } - elsif ($source_stream) { # source but no destination goes to stdout - $output_file = '-'; - } - elsif ( $input_file eq '-' ) { - $output_file = '-'; - } - else { - if ($in_place_modify) { - $output_file = IO::File->new_tmpfile() - or die "cannot open temp file for -b option: $!\n"; - } - else { - $actual_output_extension = $output_extension; - $output_file = $fileroot . $output_extension; - } - } - - # the 'sink_object' knows how to write the output file - my $tee_file = $fileroot . $dot . "TEE"; - - my $line_separator = $rOpts->{'output-line-ending'}; - if ( $rOpts->{'preserve-line-endings'} ) { - $line_separator = find_input_line_ending($input_file); - } - - # Eventually all I/O may be done with binmode, but for now it is - # only done when a user requests a particular line separator - # through the -ple or -ole flags - my $binmode = 0; - if ( defined($line_separator) ) { $binmode = 1 } - else { $line_separator = "\n" } - - my $sink_object = - Perl::Tidy::LineSink->new( $output_file, $tee_file, - $line_separator, $rOpts, $rpending_logfile_message, $binmode ); - - #--------------------------------------------------------------- - # initialize the error logger - #--------------------------------------------------------------- - my $warning_file = $fileroot . $dot . "ERR"; - if ($errorfile_stream) { $warning_file = $errorfile_stream } - my $log_file = $fileroot . $dot . "LOG"; - if ($logfile_stream) { $log_file = $logfile_stream } - - my $logger_object = - Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, - $saw_extrude ); - write_logfile_header( - $rOpts, $logger_object, $config_file, - $rraw_options, $Windows_type, $readable_options, - ); - if ($$rpending_logfile_message) { - $logger_object->write_logfile_entry($$rpending_logfile_message); - } - if ($$rpending_complaint) { - $logger_object->complain($$rpending_complaint); - } - - #--------------------------------------------------------------- - # initialize the debug object, if any - #--------------------------------------------------------------- - my $debugger_object = undef; - if ( $rOpts->{DEBUG} ) { - $debugger_object = - Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); - } - - #--------------------------------------------------------------- - # create a formatter for this file : html writer or pretty printer - #--------------------------------------------------------------- - - # we have to delete any old formatter because, for safety, - # the formatter will check to see that there is only one. - $formatter = undef; - - if ($user_formatter) { - $formatter = $user_formatter; - } - elsif ( $rOpts->{'format'} eq 'html' ) { - $formatter = - Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, - $actual_output_extension, $html_toc_extension, - $html_src_extension ); - } - elsif ( $rOpts->{'format'} eq 'tidy' ) { - $formatter = Perl::Tidy::Formatter->new( - logger_object => $logger_object, - diagnostics_object => $diagnostics_object, - sink_object => $sink_object, - ); - } - else { - die "I don't know how to do -format=$rOpts->{'format'}\n"; - } - - unless ($formatter) { - die "Unable to continue with $rOpts->{'format'} formatting\n"; - } - - #--------------------------------------------------------------- - # create the tokenizer for this file - #--------------------------------------------------------------- - $tokenizer = undef; # must destroy old tokenizer - $tokenizer = Perl::Tidy::Tokenizer->new( - source_object => $source_object, - logger_object => $logger_object, - debugger_object => $debugger_object, - diagnostics_object => $diagnostics_object, - starting_level => $rOpts->{'starting-indentation-level'}, - tabs => $rOpts->{'tabs'}, - indent_columns => $rOpts->{'indent-columns'}, - look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, - look_for_autoloader => $rOpts->{'look-for-autoloader'}, - look_for_selfloader => $rOpts->{'look-for-selfloader'}, - trim_qw => $rOpts->{'trim-qw'}, - ); - - #--------------------------------------------------------------- - # now we can do it - #--------------------------------------------------------------- - process_this_file( $tokenizer, $formatter ); - - #--------------------------------------------------------------- - # close the input source and report errors - #--------------------------------------------------------------- - $source_object->close_input_file(); - - # get file names to use for syntax check - my $ifname = $source_object->get_input_file_copy_name(); - my $ofname = $sink_object->get_output_file_copy(); - - #--------------------------------------------------------------- - # handle the -b option (backup and modify in-place) - #--------------------------------------------------------------- - if ($in_place_modify) { - unless ( -f $input_file ) { - - # oh, oh, no real file to backup .. - # shouldn't happen because of numerous preliminary checks - die print -"problem with -b backing up input file '$input_file': not a file\n"; - } - my $backup_name = $input_file . $backup_extension; - if ( -f $backup_name ) { - unlink($backup_name) - or die -"unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; - } - rename( $input_file, $backup_name ) - or die -"problem renaming $input_file to $backup_name for -b option: $!\n"; - $ifname = $backup_name; - - seek( $output_file, 0, 0 ) - or die "unable to rewind tmp file for -b option: $!\n"; - - my $fout = IO::File->new("> $input_file") - or die -"problem opening $input_file for write for -b option; check directory permissions: $!\n"; - binmode $fout; - my $line; - while ( $line = $output_file->getline() ) { - $fout->print($line); - } - $fout->close(); - $output_file = $input_file; - $ofname = $input_file; - } - - #--------------------------------------------------------------- - # clean up and report errors - #--------------------------------------------------------------- - $sink_object->close_output_file() if $sink_object; - $debugger_object->close_debug_file() if $debugger_object; - - my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes - if ($output_file) { - - if ($input_file_permissions) { - - # give output script same permissions as input script, but - # make it user-writable or else we can't run perltidy again. - # Thus we retain whatever executable flags were set. - if ( $rOpts->{'format'} eq 'tidy' ) { - chmod( $input_file_permissions | 0600, $output_file ); - } - - # else use default permissions for html and any other format - - } - if ( $logger_object && $rOpts->{'check-syntax'} ) { - $infile_syntax_ok = - check_syntax( $ifname, $ofname, $logger_object, $rOpts ); - } - } - - $logger_object->finish( $infile_syntax_ok, $formatter ) - if $logger_object; - } # end of loop to process all files - } # end of main program -} - -sub fileglob_to_re { - - # modified (corrected) from version in find2perl - my $x = shift; - $x =~ s#([./^\$()])#\\$1#g; # escape special characters - $x =~ s#\*#.*#g; # '*' -> '.*' - $x =~ s#\?#.#g; # '?' -> '.' - "^$x\\z"; # match whole word -} - -sub make_extension { - - # Make a file extension, including any leading '.' if necessary - # The '.' may actually be an '_' under VMS - my ( $extension, $default, $dot ) = @_; - - # Use the default if none specified - $extension = $default unless ($extension); - - # Only extensions with these leading characters get a '.' - # This rule gives the user some freedom - if ( $extension =~ /^[a-zA-Z0-9]/ ) { - $extension = $dot . $extension; - } - return $extension; -} - -sub write_logfile_header { - my ( - $rOpts, $logger_object, $config_file, - $rraw_options, $Windows_type, $readable_options - ) = @_; - $logger_object->write_logfile_entry( -"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" - ); - if ($Windows_type) { - $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); - } - my $options_string = join( ' ', @$rraw_options ); - - if ($config_file) { - $logger_object->write_logfile_entry( - "Found Configuration File >>> $config_file \n"); - } - $logger_object->write_logfile_entry( - "Configuration and command line parameters for this run:\n"); - $logger_object->write_logfile_entry("$options_string\n"); - - if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { - $rOpts->{'logfile'} = 1; # force logfile to be saved - $logger_object->write_logfile_entry( - "Final parameter set for this run\n"); - $logger_object->write_logfile_entry( - "------------------------------------\n"); - - $logger_object->write_logfile_entry($readable_options); - - $logger_object->write_logfile_entry( - "------------------------------------\n"); - } - $logger_object->write_logfile_entry( - "To find error messages search for 'WARNING' with your editor\n"); -} - -sub generate_options { - - ###################################################################### - # Generate and return references to: - # @option_string - the list of options to be passed to Getopt::Long - # @defaults - the list of default options - # %expansion - a hash showing how all abbreviations are expanded - # %category - a hash giving the general category of each option - # %option_range - a hash giving the valid ranges of certain options - - # Note: a few options are not documented in the man page and usage - # message. This is because these are experimental or debug options and - # may or may not be retained in future versions. - # - # Here are the undocumented flags as far as I know. Any of them - # may disappear at any time. They are mainly for fine-tuning - # and debugging. - # - # fll --> fuzzy-line-length # a trivial parameter which gets - # turned off for the extrude option - # which is mainly for debugging - # chk --> check-multiline-quotes # check for old bug; to be deleted - # scl --> short-concatenation-item-length # helps break at '.' - # recombine # for debugging line breaks - # valign # for debugging vertical alignment - # I --> DIAGNOSTICS # for debugging - ###################################################################### - - # here is a summary of the Getopt codes: - # does not take an argument - # =s takes a mandatory string - # :s takes an optional string (DO NOT USE - filenames will get eaten up) - # =i takes a mandatory integer - # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) - # ! does not take an argument and may be negated - # i.e., -foo and -nofoo are allowed - # a double dash signals the end of the options list - # - #--------------------------------------------------------------- - # Define the option string passed to GetOptions. - #--------------------------------------------------------------- - - my @option_string = (); - my %expansion = (); - my %option_category = (); - my %option_range = (); - my $rexpansion = \%expansion; - - # names of categories in manual - # leading integers will allow sorting - my @category_name = ( - '0. I/O control', - '1. Basic formatting options', - '2. Code indentation control', - '3. Whitespace control', - '4. Comment controls', - '5. Linebreak controls', - '6. Controlling list formatting', - '7. Retaining or ignoring existing line breaks', - '8. Blank line control', - '9. Other controls', - '10. HTML options', - '11. pod2html options', - '12. Controlling HTML properties', - '13. Debugging', - ); - - # These options are parsed directly by perltidy: - # help h - # version v - # However, they are included in the option set so that they will - # be seen in the options dump. - - # These long option names have no abbreviations or are treated specially - @option_string = qw( - html! - noprofile - no-profile - npro - recombine! - valign! - ); - - my $category = 13; # Debugging - foreach (@option_string) { - my $opt = $_; # must avoid changing the actual flag - $opt =~ s/!$//; - $option_category{$opt} = $category_name[$category]; - } - - $category = 11; # HTML - $option_category{html} = $category_name[$category]; - - # routine to install and check options - my $add_option = sub { - my ( $long_name, $short_name, $flag ) = @_; - push @option_string, $long_name . $flag; - $option_category{$long_name} = $category_name[$category]; - if ($short_name) { - if ( $expansion{$short_name} ) { - my $existing_name = $expansion{$short_name}[0]; - die -"redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; - } - $expansion{$short_name} = [$long_name]; - if ( $flag eq '!' ) { - my $nshort_name = 'n' . $short_name; - my $nolong_name = 'no' . $long_name; - if ( $expansion{$nshort_name} ) { - my $existing_name = $expansion{$nshort_name}[0]; - die -"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; - } - $expansion{$nshort_name} = [$nolong_name]; - } - } - }; - - # Install long option names which have a simple abbreviation. - # Options with code '!' get standard negation ('no' for long names, - # 'n' for abbreviations). Categories follow the manual. - - ########################### - $category = 0; # I/O_Control - ########################### - $add_option->( 'backup-and-modify-in-place', 'b', '!' ); - $add_option->( 'backup-file-extension', 'bext', '=s' ); - $add_option->( 'force-read-binary', 'f', '!' ); - $add_option->( 'format', 'fmt', '=s' ); - $add_option->( 'logfile', 'log', '!' ); - $add_option->( 'logfile-gap', 'g', ':i' ); - $add_option->( 'outfile', 'o', '=s' ); - $add_option->( 'output-file-extension', 'oext', '=s' ); - $add_option->( 'output-path', 'opath', '=s' ); - $add_option->( 'profile', 'pro', '=s' ); - $add_option->( 'quiet', 'q', '!' ); - $add_option->( 'standard-error-output', 'se', '!' ); - $add_option->( 'standard-output', 'st', '!' ); - $add_option->( 'warning-output', 'w', '!' ); - - # options which are both toggle switches and values moved here - # to hide from tidyview (which does not show category 0 flags): - # -ole moved here from category 1 - # -sil moved here from category 2 - $add_option->( 'output-line-ending', 'ole', '=s' ); - $add_option->( 'starting-indentation-level', 'sil', '=i' ); - - ######################################## - $category = 1; # Basic formatting options - ######################################## - $add_option->( 'check-syntax', 'syn', '!' ); - $add_option->( 'entab-leading-whitespace', 'et', '=i' ); - $add_option->( 'indent-columns', 'i', '=i' ); - $add_option->( 'maximum-line-length', 'l', '=i' ); - $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); - $add_option->( 'preserve-line-endings', 'ple', '!' ); - $add_option->( 'tabs', 't', '!' ); - - ######################################## - $category = 2; # Code indentation control - ######################################## - $add_option->( 'continuation-indentation', 'ci', '=i' ); - $add_option->( 'line-up-parentheses', 'lp', '!' ); - $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); - $add_option->( 'outdent-keywords', 'okw', '!' ); - $add_option->( 'outdent-labels', 'ola', '!' ); - $add_option->( 'outdent-long-quotes', 'olq', '!' ); - $add_option->( 'indent-closing-brace', 'icb', '!' ); - $add_option->( 'closing-token-indentation', 'cti', '=i' ); - $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); - $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); - $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); - $add_option->( 'brace-left-and-indent', 'bli', '!' ); - $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); - - ######################################## - $category = 3; # Whitespace control - ######################################## - $add_option->( 'add-semicolons', 'asc', '!' ); - $add_option->( 'add-whitespace', 'aws', '!' ); - $add_option->( 'block-brace-tightness', 'bbt', '=i' ); - $add_option->( 'brace-tightness', 'bt', '=i' ); - $add_option->( 'delete-old-whitespace', 'dws', '!' ); - $add_option->( 'delete-semicolons', 'dsm', '!' ); - $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); - $add_option->( 'nowant-left-space', 'nwls', '=s' ); - $add_option->( 'nowant-right-space', 'nwrs', '=s' ); - $add_option->( 'paren-tightness', 'pt', '=i' ); - $add_option->( 'space-after-keyword', 'sak', '=s' ); - $add_option->( 'space-for-semicolon', 'sfs', '!' ); - $add_option->( 'space-function-paren', 'sfp', '!' ); - $add_option->( 'space-keyword-paren', 'skp', '!' ); - $add_option->( 'space-terminal-semicolon', 'sts', '!' ); - $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); - $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); - $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); - $add_option->( 'trim-qw', 'tqw', '!' ); - $add_option->( 'want-left-space', 'wls', '=s' ); - $add_option->( 'want-right-space', 'wrs', '=s' ); - - ######################################## - $category = 4; # Comment controls - ######################################## - $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); - $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); - $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); - $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); - $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); - $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); - $add_option->( 'closing-side-comments', 'csc', '!' ); - $add_option->( 'format-skipping', 'fs', '!' ); - $add_option->( 'format-skipping-begin', 'fsb', '=s' ); - $add_option->( 'format-skipping-end', 'fse', '=s' ); - $add_option->( 'hanging-side-comments', 'hsc', '!' ); - $add_option->( 'indent-block-comments', 'ibc', '!' ); - $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); - $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); - $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); - $add_option->( 'outdent-long-comments', 'olc', '!' ); - $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); - $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); - $add_option->( 'static-block-comments', 'sbc', '!' ); - $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); - $add_option->( 'static-side-comments', 'ssc', '!' ); - - ######################################## - $category = 5; # Linebreak controls - ######################################## - $add_option->( 'add-newlines', 'anl', '!' ); - $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); - $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); - $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); - $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); - $add_option->( 'cuddled-else', 'ce', '!' ); - $add_option->( 'delete-old-newlines', 'dnl', '!' ); - $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); - $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); - $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); - $add_option->( 'opening-paren-right', 'opr', '!' ); - $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); - $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); - $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); - $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); - $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); - $add_option->( 'stack-closing-paren', 'scp', '!' ); - $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); - $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); - $add_option->( 'stack-opening-paren', 'sop', '!' ); - $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); - $add_option->( 'vertical-tightness', 'vt', '=i' ); - $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); - $add_option->( 'want-break-after', 'wba', '=s' ); - $add_option->( 'want-break-before', 'wbb', '=s' ); - $add_option->( 'break-after-all-operators', 'baao', '!' ); - $add_option->( 'break-before-all-operators', 'bbao', '!' ); - $add_option->( 'keep-interior-semicolons', 'kis', '!' ); - - ######################################## - $category = 6; # Controlling list formatting - ######################################## - $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); - $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); - $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); - - ######################################## - $category = 7; # Retaining or ignoring existing line breaks - ######################################## - $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); - $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); - $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); - $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); - - ######################################## - $category = 8; # Blank line control - ######################################## - $add_option->( 'blanks-before-blocks', 'bbb', '!' ); - $add_option->( 'blanks-before-comments', 'bbc', '!' ); - $add_option->( 'blanks-before-subs', 'bbs', '!' ); - $add_option->( 'long-block-line-count', 'lbl', '=i' ); - $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); - $add_option->( 'swallow-optional-blank-lines', 'sob', '!' ); - - ######################################## - $category = 9; # Other controls - ######################################## - $add_option->( 'delete-block-comments', 'dbc', '!' ); - $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); - $add_option->( 'delete-pod', 'dp', '!' ); - $add_option->( 'delete-side-comments', 'dsc', '!' ); - $add_option->( 'tee-block-comments', 'tbc', '!' ); - $add_option->( 'tee-pod', 'tp', '!' ); - $add_option->( 'tee-side-comments', 'tsc', '!' ); - $add_option->( 'look-for-autoloader', 'lal', '!' ); - $add_option->( 'look-for-hash-bang', 'x', '!' ); - $add_option->( 'look-for-selfloader', 'lsl', '!' ); - $add_option->( 'pass-version-line', 'pvl', '!' ); - - ######################################## - $category = 13; # Debugging - ######################################## - $add_option->( 'DEBUG', 'D', '!' ); - $add_option->( 'DIAGNOSTICS', 'I', '!' ); - $add_option->( 'check-multiline-quotes', 'chk', '!' ); - $add_option->( 'dump-defaults', 'ddf', '!' ); - $add_option->( 'dump-long-names', 'dln', '!' ); - $add_option->( 'dump-options', 'dop', '!' ); - $add_option->( 'dump-profile', 'dpro', '!' ); - $add_option->( 'dump-short-names', 'dsn', '!' ); - $add_option->( 'dump-token-types', 'dtt', '!' ); - $add_option->( 'dump-want-left-space', 'dwls', '!' ); - $add_option->( 'dump-want-right-space', 'dwrs', '!' ); - $add_option->( 'fuzzy-line-length', 'fll', '!' ); - $add_option->( 'help', 'h', '' ); - $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); - $add_option->( 'show-options', 'opt', '!' ); - $add_option->( 'version', 'v', '' ); - - #--------------------------------------------------------------------- - - # The Perl::Tidy::HtmlWriter will add its own options to the string - Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); - - ######################################## - # Set categories 10, 11, 12 - ######################################## - # Based on their known order - $category = 12; # HTML properties - foreach my $opt (@option_string) { - my $long_name = $opt; - $long_name =~ s/(!|=.*|:.*)$//; - unless ( defined( $option_category{$long_name} ) ) { - if ( $long_name =~ /^html-linked/ ) { - $category = 10; # HTML options - } - elsif ( $long_name =~ /^pod2html/ ) { - $category = 11; # Pod2html - } - $option_category{$long_name} = $category_name[$category]; - } - } - - #--------------------------------------------------------------- - # Assign valid ranges to certain options - #--------------------------------------------------------------- - # In the future, these may be used to make preliminary checks - # hash keys are long names - # If key or value is undefined: - # strings may have any value - # integer ranges are >=0 - # If value is defined: - # value is [qw(any valid words)] for strings - # value is [min, max] for integers - # if min is undefined, there is no lower limit - # if max is undefined, there is no upper limit - # Parameters not listed here have defaults - %option_range = ( - 'format' => [ 'tidy', 'html', 'user' ], - 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], - - 'block-brace-tightness' => [ 0, 2 ], - 'brace-tightness' => [ 0, 2 ], - 'paren-tightness' => [ 0, 2 ], - 'square-bracket-tightness' => [ 0, 2 ], - - 'block-brace-vertical-tightness' => [ 0, 2 ], - 'brace-vertical-tightness' => [ 0, 2 ], - 'brace-vertical-tightness-closing' => [ 0, 2 ], - 'paren-vertical-tightness' => [ 0, 2 ], - 'paren-vertical-tightness-closing' => [ 0, 2 ], - 'square-bracket-vertical-tightness' => [ 0, 2 ], - 'square-bracket-vertical-tightness-closing' => [ 0, 2 ], - 'vertical-tightness' => [ 0, 2 ], - 'vertical-tightness-closing' => [ 0, 2 ], - - 'closing-brace-indentation' => [ 0, 3 ], - 'closing-paren-indentation' => [ 0, 3 ], - 'closing-square-bracket-indentation' => [ 0, 3 ], - 'closing-token-indentation' => [ 0, 3 ], - - 'closing-side-comment-else-flag' => [ 0, 2 ], - 'comma-arrow-breakpoints' => [ 0, 3 ], - ); - - # Note: we could actually allow negative ci if someone really wants it: - # $option_range{'continuation-indentation'} = [ undef, undef ]; - - #--------------------------------------------------------------- - # Assign default values to the above options here, except - # for 'outfile' and 'help'. - # These settings should approximate the perlstyle(1) suggestions. - #--------------------------------------------------------------- - my @defaults = qw( - add-newlines - add-semicolons - add-whitespace - blanks-before-blocks - blanks-before-comments - blanks-before-subs - block-brace-tightness=0 - block-brace-vertical-tightness=0 - brace-tightness=1 - brace-vertical-tightness-closing=0 - brace-vertical-tightness=0 - break-at-old-logical-breakpoints - break-at-old-ternary-breakpoints - break-at-old-keyword-breakpoints - comma-arrow-breakpoints=1 - nocheck-syntax - closing-side-comment-interval=6 - closing-side-comment-maximum-text=20 - closing-side-comment-else-flag=0 - closing-paren-indentation=0 - closing-brace-indentation=0 - closing-square-bracket-indentation=0 - continuation-indentation=2 - delete-old-newlines - delete-semicolons - fuzzy-line-length - hanging-side-comments - indent-block-comments - indent-columns=4 - long-block-line-count=8 - look-for-autoloader - look-for-selfloader - maximum-consecutive-blank-lines=1 - maximum-fields-per-table=0 - maximum-line-length=80 - minimum-space-to-comment=4 - nobrace-left-and-indent - nocuddled-else - nodelete-old-whitespace - nohtml - nologfile - noquiet - noshow-options - nostatic-side-comments - noswallow-optional-blank-lines - notabs - nowarning-output - outdent-labels - outdent-long-quotes - outdent-long-comments - paren-tightness=1 - paren-vertical-tightness-closing=0 - paren-vertical-tightness=0 - pass-version-line - recombine - valign - short-concatenation-item-length=8 - space-for-semicolon - square-bracket-tightness=1 - square-bracket-vertical-tightness-closing=0 - square-bracket-vertical-tightness=0 - static-block-comments - trim-qw - format=tidy - backup-file-extension=bak - format-skipping - - pod2html - html-table-of-contents - html-entities - ); - - push @defaults, "perl-syntax-check-flags=-c -T"; - - #--------------------------------------------------------------- - # Define abbreviations which will be expanded into the above primitives. - # These may be defined recursively. - #--------------------------------------------------------------- - %expansion = ( - %expansion, - 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], - 'fnl' => [qw(freeze-newlines)], - 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], - 'fws' => [qw(freeze-whitespace)], - 'indent-only' => [qw(freeze-newlines freeze-whitespace)], - 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], - 'nooutdent-long-lines' => - [qw(nooutdent-long-quotes nooutdent-long-comments)], - 'noll' => [qw(nooutdent-long-lines)], - 'io' => [qw(indent-only)], - 'delete-all-comments' => - [qw(delete-block-comments delete-side-comments delete-pod)], - 'nodelete-all-comments' => - [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], - 'dac' => [qw(delete-all-comments)], - 'ndac' => [qw(nodelete-all-comments)], - 'gnu' => [qw(gnu-style)], - 'pbp' => [qw(perl-best-practices)], - 'tee-all-comments' => - [qw(tee-block-comments tee-side-comments tee-pod)], - 'notee-all-comments' => - [qw(notee-block-comments notee-side-comments notee-pod)], - 'tac' => [qw(tee-all-comments)], - 'ntac' => [qw(notee-all-comments)], - 'html' => [qw(format=html)], - 'nhtml' => [qw(format=tidy)], - 'tidy' => [qw(format=tidy)], - - 'break-after-comma-arrows' => [qw(cab=0)], - 'nobreak-after-comma-arrows' => [qw(cab=1)], - 'baa' => [qw(cab=0)], - 'nbaa' => [qw(cab=1)], - - 'break-at-old-trinary-breakpoints' => [qw(bot)], - - 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], - 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], - 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], - 'icp' => [qw(cpi=2 cbi=2 csbi=2)], - 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], - - 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], - 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], - 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], - 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], - 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], - - 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], - 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], - 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], - - 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], - 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], - 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], - - 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], - 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], - 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], - - 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], - 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], - 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], - - 'otr' => [qw(opr ohbr osbr)], - 'opening-token-right' => [qw(opr ohbr osbr)], - 'notr' => [qw(nopr nohbr nosbr)], - 'noopening-token-right' => [qw(nopr nohbr nosbr)], - - 'sot' => [qw(sop sohb sosb)], - 'nsot' => [qw(nsop nsohb nsosb)], - 'stack-opening-tokens' => [qw(sop sohb sosb)], - 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)], - - 'sct' => [qw(scp schb scsb)], - 'stack-closing-tokens' => => [qw(scp schb scsb)], - 'nsct' => [qw(nscp nschb nscsb)], - 'nostack-opening-tokens' => [qw(nscp nschb nscsb)], - - # 'mangle' originally deleted pod and comments, but to keep it - # reversible, it no longer does. But if you really want to - # delete them, just use: - # -mangle -dac - - # An interesting use for 'mangle' is to do this: - # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new - # which will form as many one-line blocks as possible - - 'mangle' => [ - qw( - check-syntax - delete-old-newlines - delete-old-whitespace - delete-semicolons - indent-columns=0 - maximum-consecutive-blank-lines=0 - maximum-line-length=100000 - noadd-newlines - noadd-semicolons - noadd-whitespace - noblanks-before-blocks - noblanks-before-subs - notabs - ) - ], - - # 'extrude' originally deleted pod and comments, but to keep it - # reversible, it no longer does. But if you really want to - # delete them, just use - # extrude -dac - # - # An interesting use for 'extrude' is to do this: - # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new - # which will break up all one-line blocks. - - 'extrude' => [ - qw( - check-syntax - ci=0 - delete-old-newlines - delete-old-whitespace - delete-semicolons - indent-columns=0 - maximum-consecutive-blank-lines=0 - maximum-line-length=1 - noadd-semicolons - noadd-whitespace - noblanks-before-blocks - noblanks-before-subs - nofuzzy-line-length - notabs - norecombine - ) - ], - - # this style tries to follow the GNU Coding Standards (which do - # not really apply to perl but which are followed by some perl - # programmers). - 'gnu-style' => [ - qw( - lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 - ) - ], - - # Style suggested in Damian Conway's Perl Best Practices - 'perl-best-practices' => [ - qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq), -q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=) - ], - - # Additional styles can be added here - ); - - Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); - - # Uncomment next line to dump all expansions for debugging: - # dump_short_names(\%expansion); - return ( - \@option_string, \@defaults, \%expansion, - \%option_category, \%option_range - ); - -} # end of generate_options - -sub process_command_line { - - my ( - $perltidyrc_stream, $is_Windows, $Windows_type, - $rpending_complaint, $dump_options_type - ) = @_; - - use Getopt::Long; - - my ( - $roption_string, $rdefaults, $rexpansion, - $roption_category, $roption_range - ) = generate_options(); - - #--------------------------------------------------------------- - # set the defaults by passing the above list through GetOptions - #--------------------------------------------------------------- - my %Opts = (); - { - local @ARGV; - my $i; - - # do not load the defaults if we are just dumping perltidyrc - unless ( $dump_options_type eq 'perltidyrc' ) { - for $i (@$rdefaults) { push @ARGV, "--" . $i } - } - - # Patch to save users Getopt::Long configuration - # and set to Getopt::Long defaults. Use eval to avoid - # breaking old versions of Perl without these routines. - my $glc; - eval { $glc = Getopt::Long::Configure() }; - unless ($@) { - eval { Getopt::Long::ConfigDefaults() }; - } - else { $glc = undef } - - if ( !GetOptions( \%Opts, @$roption_string ) ) { - die "Programming Bug: error in setting default options"; - } - - # Patch to put the previous Getopt::Long configuration back - eval { Getopt::Long::Configure($glc) } if defined $glc; - } - - my $word; - my @raw_options = (); - my $config_file = ""; - my $saw_ignore_profile = 0; - my $saw_extrude = 0; - my $saw_dump_profile = 0; - my $i; - - #--------------------------------------------------------------- - # Take a first look at the command-line parameters. Do as many - # immediate dumps as possible, which can avoid confusion if the - # perltidyrc file has an error. - #--------------------------------------------------------------- - foreach $i (@ARGV) { - - $i =~ s/^--/-/; - if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { - $saw_ignore_profile = 1; - } - - # note: this must come before -pro and -profile, below: - elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { - $saw_dump_profile = 1; - } - elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { - if ($config_file) { - warn -"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; - } - $config_file = $2; - unless ( -e $config_file ) { - warn "cannot find file given with -pro=$config_file: $!\n"; - $config_file = ""; - } - } - elsif ( $i =~ /^-(pro|profile)=?$/ ) { - die "usage: -pro=filename or --profile=filename, no spaces\n"; - } - elsif ( $i =~ /^-extrude$/ ) { - $saw_extrude = 1; - } - elsif ( $i =~ /^-(help|h|HELP|H)$/ ) { - usage(); - exit 1; - } - elsif ( $i =~ /^-(version|v)$/ ) { - show_version(); - exit 1; - } - elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { - dump_defaults(@$rdefaults); - exit 1; - } - elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { - dump_long_names(@$roption_string); - exit 1; - } - elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { - dump_short_names($rexpansion); - exit 1; - } - elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { - Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); - exit 1; - } - } - - if ( $saw_dump_profile && $saw_ignore_profile ) { - warn "No profile to dump because of -npro\n"; - exit 1; - } - - #--------------------------------------------------------------- - # read any .perltidyrc configuration file - #--------------------------------------------------------------- - unless ($saw_ignore_profile) { - - # resolve possible conflict between $perltidyrc_stream passed - # as call parameter to perltidy and -pro=filename on command - # line. - if ($perltidyrc_stream) { - if ($config_file) { - warn <{'vertical-tightness'} ) { - my $vt = $rOpts->{'vertical-tightness'}; - $rOpts->{'paren-vertical-tightness'} = $vt; - $rOpts->{'square-bracket-vertical-tightness'} = $vt; - $rOpts->{'brace-vertical-tightness'} = $vt; - } - - if ( defined $rOpts->{'vertical-tightness-closing'} ) { - my $vtc = $rOpts->{'vertical-tightness-closing'}; - $rOpts->{'paren-vertical-tightness-closing'} = $vtc; - $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc; - $rOpts->{'brace-vertical-tightness-closing'} = $vtc; - } - - if ( defined $rOpts->{'closing-token-indentation'} ) { - my $cti = $rOpts->{'closing-token-indentation'}; - $rOpts->{'closing-square-bracket-indentation'} = $cti; - $rOpts->{'closing-brace-indentation'} = $cti; - $rOpts->{'closing-paren-indentation'} = $cti; - } - - # In quiet mode, there is no log file and hence no way to report - # results of syntax check, so don't do it. - if ( $rOpts->{'quiet'} ) { - $rOpts->{'check-syntax'} = 0; - } - - # can't check syntax if no output - if ( $rOpts->{'format'} ne 'tidy' ) { - $rOpts->{'check-syntax'} = 0; - } - - # Never let Windows 9x/Me systems run syntax check -- this will prevent a - # wide variety of nasty problems on these systems, because they cannot - # reliably run backticks. Don't even think about changing this! - if ( $rOpts->{'check-syntax'} - && $is_Windows - && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) - { - $rOpts->{'check-syntax'} = 0; - } - - # It's really a bad idea to check syntax as root unless you wrote - # the script yourself. FIXME: not sure if this works with VMS - unless ($is_Windows) { - - if ( $< == 0 && $rOpts->{'check-syntax'} ) { - $rOpts->{'check-syntax'} = 0; - $$rpending_complaint .= -"Syntax check deactivated for safety; you shouldn't run this as root\n"; - } - } - - # see if user set a non-negative logfile-gap - if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { - - # a zero gap will be taken as a 1 - if ( $rOpts->{'logfile-gap'} == 0 ) { - $rOpts->{'logfile-gap'} = 1; - } - - # setting a non-negative logfile gap causes logfile to be saved - $rOpts->{'logfile'} = 1; - } - - # not setting logfile gap, or setting it negative, causes default of 50 - else { - $rOpts->{'logfile-gap'} = 50; - } - - # set short-cut flag when only indentation is to be done. - # Note that the user may or may not have already set the - # indent-only flag. - if ( !$rOpts->{'add-whitespace'} - && !$rOpts->{'delete-old-whitespace'} - && !$rOpts->{'add-newlines'} - && !$rOpts->{'delete-old-newlines'} ) - { - $rOpts->{'indent-only'} = 1; - } - - # -isbc implies -ibc - if ( $rOpts->{'indent-spaced-block-comments'} ) { - $rOpts->{'indent-block-comments'} = 1; - } - - # -bli flag implies -bl - if ( $rOpts->{'brace-left-and-indent'} ) { - $rOpts->{'opening-brace-on-new-line'} = 1; - } - - if ( $rOpts->{'opening-brace-always-on-right'} - && $rOpts->{'opening-brace-on-new-line'} ) - { - warn <{'opening-brace-on-new-line'} = 0; - } - - # it simplifies things if -bl is 0 rather than undefined - if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) { - $rOpts->{'opening-brace-on-new-line'} = 0; - } - - # -sbl defaults to -bl if not defined - if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) { - $rOpts->{'opening-sub-brace-on-new-line'} = - $rOpts->{'opening-brace-on-new-line'}; - } - - # set shortcut flag if no blanks to be written - unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) { - $rOpts->{'swallow-optional-blank-lines'} = 1; - } - - if ( $rOpts->{'entab-leading-whitespace'} ) { - if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { - warn "-et=n must use a positive integer; ignoring -et\n"; - $rOpts->{'entab-leading-whitespace'} = undef; - } - - # entab leading whitespace has priority over the older 'tabs' option - if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } - } -} - -sub expand_command_abbreviations { - - # go through @ARGV and expand any abbreviations - - my ( $rexpansion, $rraw_options, $config_file ) = @_; - my ($word); - - # set a pass limit to prevent an infinite loop; - # 10 should be plenty, but it may be increased to allow deeply - # nested expansions. - my $max_passes = 10; - my @new_argv = (); - - # keep looping until all expansions have been converted into actual - # dash parameters.. - for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { - my @new_argv = (); - my $abbrev_count = 0; - - # loop over each item in @ARGV.. - foreach $word (@ARGV) { - - # convert any leading 'no-' to just 'no' - if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } - - # if it is a dash flag (instead of a file name).. - if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { - - my $abr = $1; - my $flags = $2; - - # save the raw input for debug output in case of circular refs - if ( $pass_count == 0 ) { - push( @$rraw_options, $word ); - } - - # recombine abbreviation and flag, if necessary, - # to allow abbreviations with arguments such as '-vt=1' - if ( $rexpansion->{ $abr . $flags } ) { - $abr = $abr . $flags; - $flags = ""; - } - - # if we see this dash item in the expansion hash.. - if ( $rexpansion->{$abr} ) { - $abbrev_count++; - - # stuff all of the words that it expands to into the - # new arg list for the next pass - foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { - next unless $abbrev; # for safety; shouldn't happen - push( @new_argv, '--' . $abbrev . $flags ); - } - } - - # not in expansion hash, must be actual long name - else { - push( @new_argv, $word ); - } - } - - # not a dash item, so just save it for the next pass - else { - push( @new_argv, $word ); - } - } # end of this pass - - # update parameter list @ARGV to the new one - @ARGV = @new_argv; - last unless ( $abbrev_count > 0 ); - - # make sure we are not in an infinite loop - if ( $pass_count == $max_passes ) { - print STDERR -"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n"; - print STDERR "Here are the raw options\n"; - local $" = ')('; - print STDERR "(@$rraw_options)\n"; - my $num = @new_argv; - - if ( $num < 50 ) { - print STDERR "After $max_passes passes here is ARGV\n"; - print STDERR "(@new_argv)\n"; - } - else { - print STDERR "After $max_passes passes ARGV has $num entries\n"; - } - - if ($config_file) { - die <<"DIE"; -Please check your configuration file $config_file for circular-references. -To deactivate it, use -npro. -DIE - } - else { - die <<'DIE'; -Program bug - circular-references in the %expansion hash, probably due to -a recent program change. -DIE - } - } # end of check for circular references - } # end of loop over all passes -} - -# Debug routine -- this will dump the expansion hash -sub dump_short_names { - my $rexpansion = shift; - print STDOUT < @list\n"; - } -} - -sub check_vms_filename { - - # given a valid filename (the perltidy input file) - # create a modified filename and separator character - # suitable for VMS. - # - # Contributed by Michael Cartmell - # - my ( $base, $path ) = fileparse( $_[0] ); - - # remove explicit ; version - $base =~ s/;-?\d*$// - - # remove explicit . version ie two dots in filename NB ^ escapes a dot - or $base =~ s/( # begin capture $1 - (?:^|[^^])\. # match a dot not preceded by a caret - (?: # followed by nothing - | # or - .*[^^] # anything ending in a non caret - ) - ) # end capture $1 - \.-?\d*$ # match . version number - /$1/x; - - # normalise filename, if there are no unescaped dots then append one - $base .= '.' unless $base =~ /(?:^|[^^])\./; - - # if we don't already have an extension then we just append the extention - my $separator = ( $base =~ /\.$/ ) ? "" : "_"; - return ( $path . $base, $separator ); -} - -sub Win_OS_Type { - - # TODO: are these more standard names? - # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003 - - # Returns a string that determines what MS OS we are on. - # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003 - # Returns blank string if not an MS system. - # Original code contributed by: Yves Orton - # We need to know this to decide where to look for config files - - my $rpending_complaint = shift; - my $os = ""; - return $os unless $^O =~ /win32|dos/i; # is it a MS box? - - # Systems built from Perl source may not have Win32.pm - # But probably have Win32::GetOSVersion() anyway so the - # following line is not 'required': - # return $os unless eval('require Win32'); - - # Use the standard API call to determine the version - my ( $undef, $major, $minor, $build, $id ); - eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; - - # - # NAME ID MAJOR MINOR - # Windows NT 4 2 4 0 - # Windows 2000 2 5 0 - # Windows XP 2 5 1 - # Windows Server 2003 2 5 2 - - return "win32s" unless $id; # If id==0 then its a win32s box. - $os = { # Magic numbers from MSDN - # documentation of GetOSVersion - 1 => { - 0 => "95", - 10 => "98", - 90 => "Me" - }, - 2 => { - 0 => "2000", # or NT 4, see below - 1 => "XP/.Net", - 2 => "Win2003", - 51 => "NT3.51" - } - }->{$id}->{$minor}; - - # If $os is undefined, the above code is out of date. Suggested updates - # are welcome. - unless ( defined $os ) { - $os = ""; - $$rpending_complaint .= <($config_file); - - # Default environment vars. - my @envs = qw(PERLTIDY HOME); - - # Check the NT/2k/XP locations, first a local machine def, then a - # network def - push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; - - # Now go through the enviornment ... - foreach my $var (@envs) { - $$rconfig_file_chatter .= "# Examining: \$ENV{$var}"; - if ( defined( $ENV{$var} ) ) { - $$rconfig_file_chatter .= " = $ENV{$var}\n"; - - # test ENV{ PERLTIDY } as file: - if ( $var eq 'PERLTIDY' ) { - $config_file = "$ENV{$var}"; - return $config_file if $exists_config_file->($config_file); - } - - # test ENV as directory: - $config_file = catfile( $ENV{$var}, ".perltidyrc" ); - return $config_file if $exists_config_file->($config_file); - } - else { - $$rconfig_file_chatter .= "\n"; - } - } - - # then look for a system-wide definition - # where to look varies with OS - if ($is_Windows) { - - if ($Windows_type) { - my ( $os, $system, $allusers ) = - Win_Config_Locs( $rpending_complaint, $Windows_type ); - - # Check All Users directory, if there is one. - if ($allusers) { - $config_file = catfile( $allusers, ".perltidyrc" ); - return $config_file if $exists_config_file->($config_file); - } - - # Check system directory. - $config_file = catfile( $system, ".perltidyrc" ); - return $config_file if $exists_config_file->($config_file); - } - } - - # Place to add customization code for other systems - elsif ( $^O eq 'OS2' ) { - } - elsif ( $^O eq 'MacOS' ) { - } - elsif ( $^O eq 'VMS' ) { - } - - # Assume some kind of Unix - else { - - $config_file = "/usr/local/etc/perltidyrc"; - return $config_file if $exists_config_file->($config_file); - - $config_file = "/etc/perltidyrc"; - return $config_file if $exists_config_file->($config_file); - } - - # Couldn't find a config file - return; -} - -sub Win_Config_Locs { - - # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), - # or undef if its not a win32 OS. In list context returns OS, System - # Directory, and All Users Directory. All Users will be empty on a - # 9x/Me box. Contributed by: Yves Orton. - - my $rpending_complaint = shift; - my $os = (@_) ? shift : Win_OS_Type(); - return unless $os; - - my $system = ""; - my $allusers = ""; - - if ( $os =~ /9[58]|Me/ ) { - $system = "C:/Windows"; - } - elsif ( $os =~ /NT|XP|200?/ ) { - $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; - $allusers = - ( $os =~ /NT/ ) - ? "C:/WinNT/profiles/All Users/" - : "C:/Documents and Settings/All Users/"; - } - else { - - # This currently would only happen on a win32s computer. I dont have - # one to test, so I am unsure how to proceed. Suggestions welcome! - $$rpending_complaint .= -"I dont know a sensible place to look for config files on an $os system.\n"; - return; - } - return wantarray ? ( $os, $system, $allusers ) : $os; -} - -sub dump_config_file { - my $fh = shift; - my $config_file = shift; - my $rconfig_file_chatter = shift; - print STDOUT "$$rconfig_file_chatter"; - if ($fh) { - print STDOUT "# Dump of file: '$config_file'\n"; - while ( my $line = $fh->getline() ) { print STDOUT $line } - eval { $fh->close() }; - } - else { - print STDOUT "# ...no config file found\n"; - } -} - -sub read_config_file { - - my ( $fh, $config_file, $rexpansion ) = @_; - my @config_list = (); - - # file is bad if non-empty $death_message is returned - my $death_message = ""; - - my $name = undef; - my $line_no; - while ( my $line = $fh->getline() ) { - $line_no++; - chomp $line; - next if $line =~ /^\s*#/; # skip full-line comment - ( $line, $death_message ) = - strip_comment( $line, $config_file, $line_no ); - last if ($death_message); - $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends - next unless $line; - - # look for something of the general form - # newname { body } - # or just - # body - - if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) { - my ( $newname, $body, $curly ) = ( $2, $3, $4 ); - - # handle a new alias definition - if ($newname) { - if ($name) { - $death_message = -"No '}' seen after $name and before $newname in config file $config_file line $.\n"; - last; - } - $name = $newname; - - if ( ${$rexpansion}{$name} ) { - local $" = ')('; - my @names = sort keys %$rexpansion; - $death_message = - "Here is a list of all installed aliases\n(@names)\n" - . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; - last; - } - ${$rexpansion}{$name} = []; - } - - # now do the body - if ($body) { - - my ( $rbody_parts, $msg ) = parse_args($body); - if ($msg) { - $death_message = <close() }; - return ( \@config_list, $death_message ); -} - -sub strip_comment { - - my ( $instr, $config_file, $line_no ) = @_; - my $msg = ""; - - # nothing to do if no comments - if ( $instr !~ /#/ ) { - return ( $instr, $msg ); - } - - # use simple method of no quotes - elsif ( $instr !~ /['"]/ ) { - $instr =~ s/\s*\#.*$//; # simple trim - return ( $instr, $msg ); - } - - # handle comments and quotes - my $outstr = ""; - my $quote_char = ""; - while (1) { - - # looking for ending quote character - if ($quote_char) { - if ( $instr =~ /\G($quote_char)/gc ) { - $quote_char = ""; - $outstr .= $1; - } - elsif ( $instr =~ /\G(.)/gc ) { - $outstr .= $1; - } - - # error..we reached the end without seeing the ending quote char - else { - $msg = < in this text: -$instr -Please fix this line or use -npro to avoid reading this file -EOM - last; - } - } - - # accumulating characters and looking for start of a quoted string - else { - if ( $instr =~ /\G([\"\'])/gc ) { - $outstr .= $1; - $quote_char = $1; - } - elsif ( $instr =~ /\G#/gc ) { - last; - } - elsif ( $instr =~ /\G(.)/gc ) { - $outstr .= $1; - } - else { - last; - } - } - } - return ( $outstr, $msg ); -} - -sub parse_args { - - # Parse a command string containing multiple string with possible - # quotes, into individual commands. It might look like this, for example: - # - # -wba=" + - " -some-thing -wbb='. && ||' - # - # There is no need, at present, to handle escaped quote characters. - # (They are not perltidy tokens, so needn't be in strings). - - my ($body) = @_; - my @body_parts = (); - my $quote_char = ""; - my $part = ""; - my $msg = ""; - while (1) { - - # looking for ending quote character - if ($quote_char) { - if ( $body =~ /\G($quote_char)/gc ) { - $quote_char = ""; - } - elsif ( $body =~ /\G(.)/gc ) { - $part .= $1; - } - - # error..we reached the end without seeing the ending quote char - else { - if ( length($part) ) { push @body_parts, $part; } - $msg = < in this text: -$body -EOM - last; - } - } - - # accumulating characters and looking for start of a quoted string - else { - if ( $body =~ /\G([\"\'])/gc ) { - $quote_char = $1; - } - elsif ( $body =~ /\G(\s+)/gc ) { - if ( length($part) ) { push @body_parts, $part; } - $part = ""; - } - elsif ( $body =~ /\G(.)/gc ) { - $part .= $1; - } - else { - if ( length($part) ) { push @body_parts, $part; } - last; - } - } - } - return ( \@body_parts, $msg ); -} - -sub dump_long_names { - - my @names = sort @_; - print STDOUT < does not take an argument -# =s takes a mandatory string -# :s takes an optional string -# =i takes a mandatory integer -# :i takes an optional integer -# ! does not take an argument and may be negated -# i.e., -foo and -nofoo are allowed -# a double dash signals the end of the options list -# -#--------------------------------------------------------------- -EOM - - foreach (@names) { print STDOUT "$_\n" } -} - -sub dump_defaults { - my @defaults = sort @_; - print STDOUT "Default command line options:\n"; - foreach (@_) { print STDOUT "$_\n" } -} - -sub readable_options { - - # return options for this run as a string which could be - # put in a perltidyrc file - my ( $rOpts, $roption_string ) = @_; - my %Getopt_flags; - my $rGetopt_flags = \%Getopt_flags; - my $readable_options = "# Final parameter set for this run.\n"; - $readable_options .= - "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; - foreach my $opt ( @{$roption_string} ) { - my $flag = ""; - if ( $opt =~ /(.*)(!|=.*)$/ ) { - $opt = $1; - $flag = $2; - } - if ( defined( $rOpts->{$opt} ) ) { - $rGetopt_flags->{$opt} = $flag; - } - } - foreach my $key ( sort keys %{$rOpts} ) { - my $flag = $rGetopt_flags->{$key}; - my $value = $rOpts->{$key}; - my $prefix = '--'; - my $suffix = ""; - if ($flag) { - if ( $flag =~ /^=/ ) { - if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } - $suffix = "=" . $value; - } - elsif ( $flag =~ /^!/ ) { - $prefix .= "no" unless ($value); - } - else { - - # shouldn't happen - $readable_options .= - "# ERROR in dump_options: unrecognized flag $flag for $key\n"; - } - } - $readable_options .= $prefix . $key . $suffix . "\n"; - } - return $readable_options; -} - -sub show_version { - print <<"EOM"; -This is perltidy, v$VERSION - -Copyright 2000-2007, Steve Hancock - -Perltidy is free software and may be copied under the terms of the GNU -General Public License, which is included in the distribution files. - -Complete documentation for perltidy can be found using 'man perltidy' -or on the internet at http://perltidy.sourceforge.net. -EOM -} - -sub usage { - - print STDOUT <outfile - perltidy [ options ] outfile - -Options have short and long forms. Short forms are shown; see -man pages for long forms. Note: '=s' indicates a required string, -and '=n' indicates a required integer. - -I/O control - -h show this help - -o=file name of the output file (only if single input file) - -oext=s change output extension from 'tdy' to s - -opath=path change path to be 'path' for output files - -b backup original to .bak and modify file in-place - -bext=s change default backup extension from 'bak' to s - -q deactivate error messages (for running under editor) - -w include non-critical warning messages in the .ERR error output - -syn run perl -c to check syntax (default under unix systems) - -log save .LOG file, which has useful diagnostics - -f force perltidy to read a binary file - -g like -log but writes more detailed .LOG file, for debugging scripts - -opt write the set of options actually used to a .LOG file - -npro ignore .perltidyrc configuration command file - -pro=file read configuration commands from file instead of .perltidyrc - -st send output to standard output, STDOUT - -se send error output to standard error output, STDERR - -v display version number to standard output and quit - -Basic Options: - -i=n use n columns per indentation level (default n=4) - -t tabs: use one tab character per indentation level, not recommeded - -nt no tabs: use n spaces per indentation level (default) - -et=n entab leading whitespace n spaces per tab; not recommended - -io "indent only": just do indentation, no other formatting. - -sil=n set starting indentation level to n; use if auto detection fails - -ole=s specify output line ending (s=dos or win, mac, unix) - -ple keep output line endings same as input (input must be filename) - -Whitespace Control - -fws freeze whitespace; this disables all whitespace changes - and disables the following switches: - -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) - -bbt same as -bt but for code block braces; same as -bt if not given - -bbvt block braces vertically tight; use with -bl or -bli - -bbvtl=s make -bbvt to apply to selected list of block types - -pt=n paren tightness (n=0, 1 or 2) - -sbt=n square bracket tightness (n=0, 1, or 2) - -bvt=n brace vertical tightness, - n=(0=open, 1=close unless multiple steps on a line, 2=always close) - -pvt=n paren vertical tightness (see -bvt for n) - -sbvt=n square bracket vertical tightness (see -bvt for n) - -bvtc=n closing brace vertical tightness: - n=(0=open, 1=sometimes close, 2=always close) - -pvtc=n closing paren vertical tightness, see -bvtc for n. - -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. - -ci=n sets continuation indentation=n, default is n=2 spaces - -lp line up parentheses, brackets, and non-BLOCK braces - -sfs add space before semicolon in for( ; ; ) - -aws allow perltidy to add whitespace (default) - -dws delete all old non-essential whitespace - -icb indent closing brace of a code block - -cti=n closing indentation of paren, square bracket, or non-block brace: - n=0 none, =1 align with opening, =2 one full indentation level - -icp equivalent to -cti=2 - -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' - -wrs=s want space right of tokens in string; - -sts put space before terminal semicolon of a statement - -sak=s put space between keywords given in s and '('; - -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' - -Line Break Control - -fnl freeze newlines; this disables all line break changes - and disables the following switches: - -anl add newlines; ok to introduce new line breaks - -bbs add blank line before subs and packages - -bbc add blank line before block comments - -bbb add blank line between major blocks - -sob swallow optional blank lines - -ce cuddled else; use this style: '} else {' - -dnl delete old newlines (default) - -mbl=n maximum consecutive blank lines (default=1) - -l=n maximum line length; default n=80 - -bl opening brace on new line - -sbl opening sub brace on new line. value of -bl is used if not given. - -bli opening brace on new line and indented - -bar opening brace always on right, even for long clauses - -vt=n vertical tightness (requires -lp); n controls break after opening - token: 0=never 1=no break if next line balanced 2=no break - -vtc=n vertical tightness of closing container; n controls if closing - token starts new line: 0=always 1=not unless list 1=never - -wba=s want break after tokens in string; i.e. wba=': .' - -wbb=s want break before tokens in string - -Following Old Breakpoints - -kis keep interior semicolons. Allows multiple statements per line. - -boc break at old comma breaks: turns off all automatic list formatting - -bol break at old logical breakpoints: or, and, ||, && (default) - -bok break at old list keyword breakpoints such as map, sort (default) - -bot break at old conditional (ternary ?:) operator breakpoints (default) - -cab=n break at commas after a comma-arrow (=>): - n=0 break at all commas after => - n=1 stable: break unless this breaks an existing one-line container - n=2 break only if a one-line container cannot be formed - n=3 do not treat commas after => specially at all - -Comment controls - -ibc indent block comments (default) - -isbc indent spaced block comments; may indent unless no leading space - -msc=n minimum desired spaces to side comment, default 4 - -fpsc=n fix position for side comments; default 0; - -csc add or update closing side comments after closing BLOCK brace - -dcsc delete closing side comments created by a -csc command - -cscp=s change closing side comment prefix to be other than '## end' - -cscl=s change closing side comment to apply to selected list of blocks - -csci=n minimum number of lines needed to apply a -csc tag, default n=6 - -csct=n maximum number of columns of appended text, default n=20 - -cscw causes warning if old side comment is overwritten with -csc - - -sbc use 'static block comments' identified by leading '##' (default) - -sbcp=s change static block comment identifier to be other than '##' - -osbc outdent static block comments - - -ssc use 'static side comments' identified by leading '##' (default) - -sscp=s change static side comment identifier to be other than '##' - -Delete selected text - -dac delete all comments AND pod - -dbc delete block comments - -dsc delete side comments - -dp delete pod - -Send selected text to a '.TEE' file - -tac tee all comments AND pod - -tbc tee block comments - -tsc tee side comments - -tp tee pod - -Outdenting - -olq outdent long quoted strings (default) - -olc outdent a long block comment line - -ola outdent statement labels - -okw outdent control keywords (redo, next, last, goto, return) - -okwl=s specify alternative keywords for -okw command - -Other controls - -mft=n maximum fields per table; default n=40 - -x do not format lines before hash-bang line (i.e., for VMS) - -asc allows perltidy to add a ';' when missing (default) - -dsm allows perltidy to delete an unnecessary ';' (default) - -Combinations of other parameters - -gnu attempt to follow GNU Coding Standards as applied to perl - -mangle remove as many newlines as possible (but keep comments and pods) - -extrude insert as many newlines as possible - -Dump and die, debugging - -dop dump options used in this run to standard output and quit - -ddf dump default options to standard output and quit - -dsn dump all option short names to standard output and quit - -dln dump option long names to standard output and quit - -dpro dump whatever configuration file is in effect to standard output - -dtt dump all token types to standard output and quit - -HTML - -html write an html file (see 'man perl2web' for many options) - Note: when -html is used, no indentation or formatting are done. - Hint: try perltidy -html -css=mystyle.css filename.pl - and edit mystyle.css to change the appearance of filename.html. - -nnn gives line numbers - -pre only writes out
..
code section - -toc places a table of contents to subs at the top (default) - -pod passes pod text through pod2html (default) - -frm write html as a frame (3 files) - -text=s extra extension for table of contents if -frm, default='toc' - -sext=s extra extension for file content if -frm, default='src' - -A prefix of "n" negates short form toggle switches, and a prefix of "no" -negates the long forms. For example, -nasc means don't add missing -semicolons. - -If you are unable to see this entire text, try "perltidy -h | more" -For more detailed information, and additional options, try "man perltidy", -or go to the perltidy home page at http://perltidy.sourceforge.net -EOF - -} - -sub process_this_file { - - my ( $truth, $beauty ) = @_; - - # loop to process each line of this file - while ( my $line_of_tokens = $truth->get_line() ) { - $beauty->write_line($line_of_tokens); - } - - # finish up - eval { $beauty->finish_formatting() }; - $truth->report_tokenization_errors(); -} - -sub check_syntax { - - # Use 'perl -c' to make sure that we did not create bad syntax - # This is a very good independent check for programming errors - # - # Given names of the input and output files, ($ifname, $ofname), - # we do the following: - # - check syntax of the input file - # - if bad, all done (could be an incomplete code snippet) - # - if infile syntax ok, then check syntax of the output file; - # - if outfile syntax bad, issue warning; this implies a code bug! - # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good - - my ( $ifname, $ofname, $logger_object, $rOpts ) = @_; - my $infile_syntax_ok = 0; - my $line_of_dashes = '-' x 42 . "\n"; - - my $flags = $rOpts->{'perl-syntax-check-flags'}; - - # be sure we invoke perl with -c - # note: perl will accept repeated flags like '-c -c'. It is safest - # to append another -c than try to find an interior bundled c, as - # in -Tc, because such a 'c' might be in a quoted string, for example. - if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } - - # be sure we invoke perl with -x if requested - # same comments about repeated parameters applies - if ( $rOpts->{'look-for-hash-bang'} ) { - if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } - } - - # this shouldn't happen unless a termporary file couldn't be made - if ( $ifname eq '-' ) { - $logger_object->write_logfile_entry( - "Cannot run perl -c on STDIN and STDOUT\n"); - return $infile_syntax_ok; - } - - $logger_object->write_logfile_entry( - "checking input file syntax with perl $flags\n"); - $logger_object->write_logfile_entry($line_of_dashes); - - # Not all operating systems/shells support redirection of the standard - # error output. - my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; - - my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection ); - $logger_object->write_logfile_entry("$perl_output\n"); - - if ( $perl_output =~ /syntax\s*OK/ ) { - $infile_syntax_ok = 1; - $logger_object->write_logfile_entry($line_of_dashes); - $logger_object->write_logfile_entry( - "checking output file syntax with perl $flags ...\n"); - $logger_object->write_logfile_entry($line_of_dashes); - - my $perl_output = - do_syntax_check( $ofname, $flags, $error_redirection ); - $logger_object->write_logfile_entry("$perl_output\n"); - - unless ( $perl_output =~ /syntax\s*OK/ ) { - $logger_object->write_logfile_entry($line_of_dashes); - $logger_object->warning( -"The output file has a syntax error when tested with perl $flags $ofname !\n" - ); - $logger_object->warning( - "This implies an error in perltidy; the file $ofname is bad\n"); - $logger_object->report_definite_bug(); - - # the perl version number will be helpful for diagnosing the problem - $logger_object->write_logfile_entry( - qx/perl -v $error_redirection/ . "\n" ); - } - } - else { - - # Only warn of perl -c syntax errors. Other messages, - # such as missing modules, are too common. They can be - # seen by running with perltidy -w - $logger_object->complain("A syntax check using perl $flags gives: \n"); - $logger_object->complain($line_of_dashes); - $logger_object->complain("$perl_output\n"); - $logger_object->complain($line_of_dashes); - $infile_syntax_ok = -1; - $logger_object->write_logfile_entry($line_of_dashes); - $logger_object->write_logfile_entry( -"The output file will not be checked because of input file problems\n" - ); - } - return $infile_syntax_ok; -} - -sub do_syntax_check { - my ( $fname, $flags, $error_redirection ) = @_; - - # We have to quote the filename in case it has unusual characters - # or spaces. Example: this filename #CM11.pm# gives trouble. - $fname = '"' . $fname . '"'; - - # Under VMS something like -T will become -t (and an error) so we - # will put quotes around the flags. Double quotes seem to work on - # Unix/Windows/VMS, but this may not work on all systems. (Single - # quotes do not work under Windows). It could become necessary to - # put double quotes around each flag, such as: -"c" -"T" - # We may eventually need some system-dependent coding here. - $flags = '"' . $flags . '"'; - - # now wish for luck... - return qx/perl $flags $fname $error_redirection/; -} - -##################################################################### -# -# This is a stripped down version of IO::Scalar -# Given a reference to a scalar, it supplies either: -# a getline method which reads lines (mode='r'), or -# a print method which reads lines (mode='w') -# -##################################################################### -package Perl::Tidy::IOScalar; -use Carp; - -sub new { - my ( $package, $rscalar, $mode ) = @_; - my $ref = ref $rscalar; - if ( $ref ne 'SCALAR' ) { - confess <[1]; - if ( $mode ne 'r' ) { - confess <[2]++; - ##my $line = $self->[0]->[$i]; - return $self->[0]->[$i]; -} - -sub print { - my $self = shift; - my $mode = $self->[1]; - if ( $mode ne 'w' ) { - confess <[0] } .= $_[0]; -} -sub close { return } - -##################################################################### -# -# This is a stripped down version of IO::ScalarArray -# Given a reference to an array, it supplies either: -# a getline method which reads lines (mode='r'), or -# a print method which reads lines (mode='w') -# -# NOTE: this routine assumes that that there aren't any embedded -# newlines within any of the array elements. There are no checks -# for that. -# -##################################################################### -package Perl::Tidy::IOScalarArray; -use Carp; - -sub new { - my ( $package, $rarray, $mode ) = @_; - my $ref = ref $rarray; - if ( $ref ne 'ARRAY' ) { - confess <[1]; - if ( $mode ne 'r' ) { - confess <[2]++; - return $self->[0]->[$i]; -} - -sub print { - my $self = shift; - my $mode = $self->[1]; - if ( $mode ne 'w' ) { - confess <[0] }, $_[0]; -} -sub close { return } - -##################################################################### -# -# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method -# which returns the next line to be parsed -# -##################################################################### - -package Perl::Tidy::LineSource; - -sub new { - - my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; - my $input_file_copy = undef; - my $fh_copy; - - my $input_line_ending; - if ( $rOpts->{'preserve-line-endings'} ) { - $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); - } - - ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); - return undef unless $fh; - - # in order to check output syntax when standard output is used, - # or when it is an object, we have to make a copy of the file - if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) - { - - # Turning off syntax check when input output is used. - # The reason is that temporary files cause problems on - # on many systems. - $rOpts->{'check-syntax'} = 0; - $input_file_copy = '-'; - - $$rpending_logfile_message .= < $fh, - _fh_copy => $fh_copy, - _filename => $input_file, - _input_file_copy => $input_file_copy, - _input_line_ending => $input_line_ending, - _rinput_buffer => [], - _started => 0, - }, $class; -} - -sub get_input_file_copy_name { - my $self = shift; - my $ifname = $self->{_input_file_copy}; - unless ($ifname) { - $ifname = $self->{_filename}; - } - return $ifname; -} - -sub close_input_file { - my $self = shift; - eval { $self->{_fh}->close() }; - eval { $self->{_fh_copy}->close() } if $self->{_fh_copy}; -} - -sub get_line { - my $self = shift; - my $line = undef; - my $fh = $self->{_fh}; - my $fh_copy = $self->{_fh_copy}; - my $rinput_buffer = $self->{_rinput_buffer}; - - if ( scalar(@$rinput_buffer) ) { - $line = shift @$rinput_buffer; - } - else { - $line = $fh->getline(); - - # patch to read raw mac files under unix, dos - # see if the first line has embedded \r's - if ( $line && !$self->{_started} ) { - if ( $line =~ /[\015][^\015\012]/ ) { - - # found one -- break the line up and store in a buffer - @$rinput_buffer = map { $_ . "\n" } split /\015/, $line; - my $count = @$rinput_buffer; - $line = shift @$rinput_buffer; - } - $self->{_started}++; - } - } - if ( $line && $fh_copy ) { $fh_copy->print($line); } - return $line; -} - -##################################################################### -# -# the Perl::Tidy::LineSink class supplies a write_line method for -# actual file writing -# -##################################################################### - -package Perl::Tidy::LineSink; - -sub new { - - my ( $class, $output_file, $tee_file, $line_separator, $rOpts, - $rpending_logfile_message, $binmode ) - = @_; - my $fh = undef; - my $fh_copy = undef; - my $fh_tee = undef; - my $output_file_copy = ""; - my $output_file_open = 0; - - if ( $rOpts->{'format'} eq 'tidy' ) { - ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); - unless ($fh) { die "Cannot write to output stream\n"; } - $output_file_open = 1; - if ($binmode) { - if ( ref($fh) eq 'IO::File' ) { - binmode $fh; - } - if ( $output_file eq '-' ) { binmode STDOUT } - } - } - - # in order to check output syntax when standard output is used, - # or when it is an object, we have to make a copy of the file - if ( $output_file eq '-' || ref $output_file ) { - if ( $rOpts->{'check-syntax'} ) { - - # Turning off syntax check when standard output is used. - # The reason is that temporary files cause problems on - # on many systems. - $rOpts->{'check-syntax'} = 0; - $output_file_copy = '-'; - $$rpending_logfile_message .= < $fh, - _fh_copy => $fh_copy, - _fh_tee => $fh_tee, - _output_file => $output_file, - _output_file_open => $output_file_open, - _output_file_copy => $output_file_copy, - _tee_flag => 0, - _tee_file => $tee_file, - _tee_file_opened => 0, - _line_separator => $line_separator, - _binmode => $binmode, - }, $class; -} - -sub write_line { - - my $self = shift; - my $fh = $self->{_fh}; - my $fh_copy = $self->{_fh_copy}; - - my $output_file_open = $self->{_output_file_open}; - chomp $_[0]; - $_[0] .= $self->{_line_separator}; - - $fh->print( $_[0] ) if ( $self->{_output_file_open} ); - print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} ); - - if ( $self->{_tee_flag} ) { - unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } - my $fh_tee = $self->{_fh_tee}; - print $fh_tee $_[0]; - } -} - -sub get_output_file_copy { - my $self = shift; - my $ofname = $self->{_output_file_copy}; - unless ($ofname) { - $ofname = $self->{_output_file}; - } - return $ofname; -} - -sub tee_on { - my $self = shift; - $self->{_tee_flag} = 1; -} - -sub tee_off { - my $self = shift; - $self->{_tee_flag} = 0; -} - -sub really_open_tee_file { - my $self = shift; - my $tee_file = $self->{_tee_file}; - my $fh_tee; - $fh_tee = IO::File->new(">$tee_file") - or die("couldn't open TEE file $tee_file: $!\n"); - binmode $fh_tee if $self->{_binmode}; - $self->{_tee_file_opened} = 1; - $self->{_fh_tee} = $fh_tee; -} - -sub close_output_file { - my $self = shift; - eval { $self->{_fh}->close() } if $self->{_output_file_open}; - eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} ); - $self->close_tee_file(); -} - -sub close_tee_file { - my $self = shift; - - if ( $self->{_tee_file_opened} ) { - eval { $self->{_fh_tee}->close() }; - $self->{_tee_file_opened} = 0; - } -} - -##################################################################### -# -# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is -# useful for program development. -# -# Only one such file is created regardless of the number of input -# files processed. This allows the results of processing many files -# to be summarized in a single file. -# -##################################################################### - -package Perl::Tidy::Diagnostics; - -sub new { - - my $class = shift; - bless { - _write_diagnostics_count => 0, - _last_diagnostic_file => "", - _input_file => "", - _fh => undef, - }, $class; -} - -sub set_input_file { - my $self = shift; - $self->{_input_file} = $_[0]; -} - -# This is a diagnostic routine which is useful for program development. -# Output from debug messages go to a file named DIAGNOSTICS, where -# they are labeled by file and line. This allows many files to be -# scanned at once for some particular condition of interest. -sub write_diagnostics { - my $self = shift; - - unless ( $self->{_write_diagnostics_count} ) { - open DIAGNOSTICS, ">DIAGNOSTICS" - or death("couldn't open DIAGNOSTICS: $!\n"); - } - - my $last_diagnostic_file = $self->{_last_diagnostic_file}; - my $input_file = $self->{_input_file}; - if ( $last_diagnostic_file ne $input_file ) { - print DIAGNOSTICS "\nFILE:$input_file\n"; - } - $self->{_last_diagnostic_file} = $input_file; - my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); - print DIAGNOSTICS "$input_line_number:\t@_"; - $self->{_write_diagnostics_count}++; -} - -##################################################################### -# -# The Perl::Tidy::Logger class writes the .LOG and .ERR files -# -##################################################################### - -package Perl::Tidy::Logger; - -sub new { - my $class = shift; - my $fh; - my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_; - - # remove any old error output file - unless ( ref($warning_file) ) { - if ( -e $warning_file ) { unlink($warning_file) } - } - - bless { - _log_file => $log_file, - _fh_warnings => undef, - _rOpts => $rOpts, - _fh_warnings => undef, - _last_input_line_written => 0, - _at_end_of_file => 0, - _use_prefix => 1, - _block_log_output => 0, - _line_of_tokens => undef, - _output_line_number => undef, - _wrote_line_information_string => 0, - _wrote_column_headings => 0, - _warning_file => $warning_file, - _warning_count => 0, - _complaint_count => 0, - _saw_code_bug => -1, # -1=no 0=maybe 1=for sure - _saw_brace_error => 0, - _saw_extrude => $saw_extrude, - _output_array => [], - }, $class; -} - -sub close_log_file { - - my $self = shift; - if ( $self->{_fh_warnings} ) { - eval { $self->{_fh_warnings}->close() }; - $self->{_fh_warnings} = undef; - } -} - -sub get_warning_count { - my $self = shift; - return $self->{_warning_count}; -} - -sub get_use_prefix { - my $self = shift; - return $self->{_use_prefix}; -} - -sub block_log_output { - my $self = shift; - $self->{_block_log_output} = 1; -} - -sub unblock_log_output { - my $self = shift; - $self->{_block_log_output} = 0; -} - -sub interrupt_logfile { - my $self = shift; - $self->{_use_prefix} = 0; - $self->warning("\n"); - $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); -} - -sub resume_logfile { - my $self = shift; - $self->write_logfile_entry( '#' x 60 . "\n" ); - $self->{_use_prefix} = 1; -} - -sub we_are_at_the_last_line { - my $self = shift; - unless ( $self->{_wrote_line_information_string} ) { - $self->write_logfile_entry("Last line\n\n"); - } - $self->{_at_end_of_file} = 1; -} - -# record some stuff in case we go down in flames -sub black_box { - my $self = shift; - my ( $line_of_tokens, $output_line_number ) = @_; - my $input_line = $line_of_tokens->{_line_text}; - my $input_line_number = $line_of_tokens->{_line_number}; - - # save line information in case we have to write a logfile message - $self->{_line_of_tokens} = $line_of_tokens; - $self->{_output_line_number} = $output_line_number; - $self->{_wrote_line_information_string} = 0; - - my $last_input_line_written = $self->{_last_input_line_written}; - my $rOpts = $self->{_rOpts}; - if ( - ( - ( $input_line_number - $last_input_line_written ) >= - $rOpts->{'logfile-gap'} - ) - || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) - ) - { - my $rlevels = $line_of_tokens->{_rlevels}; - my $structural_indentation_level = $$rlevels[0]; - $self->{_last_input_line_written} = $input_line_number; - ( my $out_str = $input_line ) =~ s/^\s*//; - chomp $out_str; - - $out_str = ( '.' x $structural_indentation_level ) . $out_str; - - if ( length($out_str) > 35 ) { - $out_str = substr( $out_str, 0, 35 ) . " ...."; - } - $self->logfile_output( "", "$out_str\n" ); - } -} - -sub write_logfile_entry { - my $self = shift; - - # add leading >>> to avoid confusing error mesages and code - $self->logfile_output( ">>>", "@_" ); -} - -sub write_column_headings { - my $self = shift; - - $self->{_wrote_column_headings} = 1; - my $routput_array = $self->{_output_array}; - push @{$routput_array}, <>>) -lines levels i k (code begins with one '.' per indent level) ------- ----- - - -------- ------------------------------------------- -EOM -} - -sub make_line_information_string { - - # make columns of information when a logfile message needs to go out - my $self = shift; - my $line_of_tokens = $self->{_line_of_tokens}; - my $input_line_number = $line_of_tokens->{_line_number}; - my $line_information_string = ""; - if ($input_line_number) { - - my $output_line_number = $self->{_output_line_number}; - my $brace_depth = $line_of_tokens->{_curly_brace_depth}; - my $paren_depth = $line_of_tokens->{_paren_depth}; - my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; - my $python_indentation_level = - $line_of_tokens->{_python_indentation_level}; - my $rlevels = $line_of_tokens->{_rlevels}; - my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; - my $rci_levels = $line_of_tokens->{_rci_levels}; - my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; - - my $structural_indentation_level = $$rlevels[0]; - - $self->write_column_headings() unless $self->{_wrote_column_headings}; - - # keep logfile columns aligned for scripts up to 999 lines; - # for longer scripts it doesn't really matter - my $extra_space = ""; - $extra_space .= - ( $input_line_number < 10 ) ? " " - : ( $input_line_number < 100 ) ? " " - : ""; - $extra_space .= - ( $output_line_number < 10 ) ? " " - : ( $output_line_number < 100 ) ? " " - : ""; - - # there are 2 possible nesting strings: - # the original which looks like this: (0 [1 {2 - # the new one, which looks like this: {{[ - # the new one is easier to read, and shows the order, but - # could be arbitrarily long, so we use it unless it is too long - my $nesting_string = - "($paren_depth [$square_bracket_depth {$brace_depth"; - my $nesting_string_new = $$rnesting_tokens[0]; - - my $ci_level = $$rci_levels[0]; - if ( $ci_level > 9 ) { $ci_level = '*' } - my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; - - if ( length($nesting_string_new) <= 8 ) { - $nesting_string = - $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); - } - if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 } - $line_information_string = -"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; - } - return $line_information_string; -} - -sub logfile_output { - my $self = shift; - my ( $prompt, $msg ) = @_; - return if ( $self->{_block_log_output} ); - - my $routput_array = $self->{_output_array}; - if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { - push @{$routput_array}, "$msg"; - } - else { - my $line_information_string = $self->make_line_information_string(); - $self->{_wrote_line_information_string} = 1; - - if ($line_information_string) { - push @{$routput_array}, "$line_information_string $prompt$msg"; - } - else { - push @{$routput_array}, "$msg"; - } - } -} - -sub get_saw_brace_error { - my $self = shift; - return $self->{_saw_brace_error}; -} - -sub increment_brace_error { - my $self = shift; - $self->{_saw_brace_error}++; -} - -sub brace_warning { - my $self = shift; - use constant BRACE_WARNING_LIMIT => 10; - my $saw_brace_error = $self->{_saw_brace_error}; - - if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { - $self->warning(@_); - } - $saw_brace_error++; - $self->{_saw_brace_error} = $saw_brace_error; - - if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { - $self->warning("No further warnings of this type will be given\n"); - } -} - -sub complain { - - # handle non-critical warning messages based on input flag - my $self = shift; - my $rOpts = $self->{_rOpts}; - - # these appear in .ERR output only if -w flag is used - if ( $rOpts->{'warning-output'} ) { - $self->warning(@_); - } - - # otherwise, they go to the .LOG file - else { - $self->{_complaint_count}++; - $self->write_logfile_entry(@_); - } -} - -sub warning { - - # report errors to .ERR file (or stdout) - my $self = shift; - use constant WARNING_LIMIT => 50; - - my $rOpts = $self->{_rOpts}; - unless ( $rOpts->{'quiet'} ) { - - my $warning_count = $self->{_warning_count}; - unless ($warning_count) { - my $warning_file = $self->{_warning_file}; - my $fh_warnings; - if ( $rOpts->{'standard-error-output'} ) { - $fh_warnings = *STDERR; - } - else { - ( $fh_warnings, my $filename ) = - Perl::Tidy::streamhandle( $warning_file, 'w' ); - $fh_warnings or die("couldn't open $filename $!\n"); - warn "## Please see file $filename\n"; - } - $self->{_fh_warnings} = $fh_warnings; - } - - my $fh_warnings = $self->{_fh_warnings}; - if ( $warning_count < WARNING_LIMIT ) { - if ( $self->get_use_prefix() > 0 ) { - my $input_line_number = - Perl::Tidy::Tokenizer::get_input_line_number(); - $fh_warnings->print("$input_line_number:\t@_"); - $self->write_logfile_entry("WARNING: @_"); - } - else { - $fh_warnings->print(@_); - $self->write_logfile_entry(@_); - } - } - $warning_count++; - $self->{_warning_count} = $warning_count; - - if ( $warning_count == WARNING_LIMIT ) { - $fh_warnings->print("No further warnings will be given\n"); - } - } -} - -# programming bug codes: -# -1 = no bug -# 0 = maybe, not sure. -# 1 = definitely -sub report_possible_bug { - my $self = shift; - my $saw_code_bug = $self->{_saw_code_bug}; - $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; -} - -sub report_definite_bug { - my $self = shift; - $self->{_saw_code_bug} = 1; -} - -sub ask_user_for_bug_report { - my $self = shift; - - my ( $infile_syntax_ok, $formatter ) = @_; - my $saw_code_bug = $self->{_saw_code_bug}; - if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { - $self->warning(<{_saw_extrude} ) { - $self->warning(<warning(<get_added_semicolon_count(); - }; - if ( $added_semicolon_count > 0 ) { - $self->warning(<{_rOpts}; - my $warning_count = $self->{_warning_count}; - my $saw_code_bug = $self->{_saw_code_bug}; - - my $save_logfile = - ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) - || $saw_code_bug == 1 - || $rOpts->{'logfile'}; - my $log_file = $self->{_log_file}; - if ($warning_count) { - if ($save_logfile) { - $self->block_log_output(); # avoid echoing this to the logfile - $self->warning( - "The logfile $log_file may contain useful information\n"); - $self->unblock_log_output(); - } - - if ( $self->{_complaint_count} > 0 ) { - $self->warning( -"To see $self->{_complaint_count} non-critical warnings rerun with -w\n" - ); - } - - if ( $self->{_saw_brace_error} - && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) ) - { - $self->warning("To save a full .LOG file rerun with -g\n"); - } - } - $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); - - if ($save_logfile) { - my $log_file = $self->{_log_file}; - my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); - if ($fh) { - my $routput_array = $self->{_output_array}; - foreach ( @{$routput_array} ) { $fh->print($_) } - eval { $fh->close() }; - } - } -} - -##################################################################### -# -# The Perl::Tidy::DevNull class supplies a dummy print method -# -##################################################################### - -package Perl::Tidy::DevNull; -sub new { return bless {}, $_[0] } -sub print { return } -sub close { return } - -##################################################################### -# -# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html -# -##################################################################### - -package Perl::Tidy::HtmlWriter; - -use File::Basename; - -# class variables -use vars qw{ - %html_color - %html_bold - %html_italic - %token_short_names - %short_to_long_names - $rOpts - $css_filename - $css_linkname - $missing_html_entities -}; - -# replace unsafe characters with HTML entity representation if HTML::Entities -# is available -{ eval "use HTML::Entities"; $missing_html_entities = $@; } - -sub new { - - my ( $class, $input_file, $html_file, $extension, $html_toc_extension, - $html_src_extension ) - = @_; - - my $html_file_opened = 0; - my $html_fh; - ( $html_fh, my $html_filename ) = - Perl::Tidy::streamhandle( $html_file, 'w' ); - unless ($html_fh) { - warn("can't open $html_file: $!\n"); - return undef; - } - $html_file_opened = 1; - - if ( !$input_file || $input_file eq '-' || ref($input_file) ) { - $input_file = "NONAME"; - } - - # write the table of contents to a string - my $toc_string; - my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); - - my $html_pre_fh; - my @pre_string_stack; - if ( $rOpts->{'html-pre-only'} ) { - - # pre section goes directly to the output stream - $html_pre_fh = $html_fh; - $html_pre_fh->print( <<"PRE_END"); -
-PRE_END
-    }
-    else {
-
-        # pre section go out to a temporary string
-        my $pre_string;
-        $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
-        push @pre_string_stack, \$pre_string;
-    }
-
-    # pod text gets diverted if the 'pod2html' is used
-    my $html_pod_fh;
-    my $pod_string;
-    if ( $rOpts->{'pod2html'} ) {
-        if ( $rOpts->{'html-pre-only'} ) {
-            undef $rOpts->{'pod2html'};
-        }
-        else {
-            eval "use Pod::Html";
-            if ($@) {
-                warn
-"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
-                undef $rOpts->{'pod2html'};
-            }
-            else {
-                $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
-            }
-        }
-    }
-
-    my $toc_filename;
-    my $src_filename;
-    if ( $rOpts->{'frames'} ) {
-        unless ($extension) {
-            warn
-"cannot use frames without a specified output extension; ignoring -frm\n";
-            undef $rOpts->{'frames'};
-        }
-        else {
-            $toc_filename = $input_file . $html_toc_extension . $extension;
-            $src_filename = $input_file . $html_src_extension . $extension;
-        }
-    }
-
-    # ----------------------------------------------------------
-    # Output is now directed as follows:
-    # html_toc_fh <-- table of contents items
-    # html_pre_fh <-- the 
 section of formatted code, except:
-    # html_pod_fh <-- pod goes here with the pod2html option
-    # ----------------------------------------------------------
-
-    my $title = $rOpts->{'title'};
-    unless ($title) {
-        ( $title, my $path ) = fileparse($input_file);
-    }
-    my $toc_item_count = 0;
-    my $in_toc_package = "";
-    my $last_level     = 0;
-    bless {
-        _input_file        => $input_file,          # name of input file
-        _title             => $title,               # title, unescaped
-        _html_file         => $html_file,           # name of .html output file
-        _toc_filename      => $toc_filename,        # for frames option
-        _src_filename      => $src_filename,        # for frames option
-        _html_file_opened  => $html_file_opened,    # a flag
-        _html_fh           => $html_fh,             # the output stream
-        _html_pre_fh       => $html_pre_fh,         # pre section goes here
-        _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
-        _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
-        _rpod_string       => \$pod_string,         # string holding pod
-        _pod_cut_count     => 0,                    # how many =cut's?
-        _html_toc_fh       => $html_toc_fh,         # fh for table of contents
-        _rtoc_string       => \$toc_string,         # string holding toc
-        _rtoc_item_count   => \$toc_item_count,     # how many toc items
-        _rin_toc_package   => \$in_toc_package,     # package name
-        _rtoc_name_count   => {},                   # hash to track unique names
-        _rpackage_stack    => [],                   # stack to check for package
-                                                    # name changes
-        _rlast_level       => \$last_level,         # brace indentation level
-    }, $class;
-}
-
-sub add_toc_item {
-
-    # Add an item to the html table of contents.
-    # This is called even if no table of contents is written,
-    # because we still want to put the anchors in the 
 text.
-    # We are given an anchor name and its type; types are:
-    #      'package', 'sub', '__END__', '__DATA__', 'EOF'
-    # There must be an 'EOF' call at the end to wrap things up.
-    my $self = shift;
-    my ( $name, $type ) = @_;
-    my $html_toc_fh     = $self->{_html_toc_fh};
-    my $html_pre_fh     = $self->{_html_pre_fh};
-    my $rtoc_name_count = $self->{_rtoc_name_count};
-    my $rtoc_item_count = $self->{_rtoc_item_count};
-    my $rlast_level     = $self->{_rlast_level};
-    my $rin_toc_package = $self->{_rin_toc_package};
-    my $rpackage_stack  = $self->{_rpackage_stack};
-
-    # packages contain sublists of subs, so to avoid errors all package
-    # items are written and finished with the following routines
-    my $end_package_list = sub {
-        if ($$rin_toc_package) {
-            $html_toc_fh->print("\n\n");
-            $$rin_toc_package = "";
-        }
-    };
-
-    my $start_package_list = sub {
-        my ( $unique_name, $package ) = @_;
-        if ($$rin_toc_package) { $end_package_list->() }
-        $html_toc_fh->print(<package $package
-
    -EOM - $$rin_toc_package = $package; - }; - - # start the table of contents on the first item - unless ($$rtoc_item_count) { - - # but just quit if we hit EOF without any other entries - # in this case, there will be no toc - return if ( $type eq 'EOF' ); - $html_toc_fh->print( <<"TOC_END"); - -
      -TOC_END - } - $$rtoc_item_count++; - - # make a unique anchor name for this location: - # - packages get a 'package-' prefix - # - subs use their names - my $unique_name = $name; - if ( $type eq 'package' ) { $unique_name = "package-$name" } - - # append '-1', '-2', etc if necessary to make unique; this will - # be unique because subs and packages cannot have a '-' - if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { - $unique_name .= "-$count"; - } - - # - all names get terminal '-' if pod2html is used, to avoid - # conflicts with anchor names created by pod2html - if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } - - # start/stop lists of subs - if ( $type eq 'sub' ) { - my $package = $rpackage_stack->[$$rlast_level]; - unless ($package) { $package = 'main' } - - # if we're already in a package/sub list, be sure its the right - # package or else close it - if ( $$rin_toc_package && $$rin_toc_package ne $package ) { - $end_package_list->(); - } - - # start a package/sub list if necessary - unless ($$rin_toc_package) { - $start_package_list->( $unique_name, $package ); - } - } - - # now write an entry in the toc for this item - if ( $type eq 'package' ) { - $start_package_list->( $unique_name, $name ); - } - elsif ( $type eq 'sub' ) { - $html_toc_fh->print("
    • $name
    • \n"); - } - else { - $end_package_list->(); - $html_toc_fh->print("
    • $name
    • \n"); - } - - # write the anchor in the
       section
      -    $html_pre_fh->print("");
      -
      -    # end the table of contents, if any, on the end of file
      -    if ( $type eq 'EOF' ) {
      -        $html_toc_fh->print( <<"TOC_END");
      -
    - -TOC_END - } -} - -BEGIN { - - # This is the official list of tokens which may be identified by the - # user. Long names are used as getopt keys. Short names are - # convenient short abbreviations for specifying input. Short names - # somewhat resemble token type characters, but are often different - # because they may only be alphanumeric, to allow command line - # input. Also, note that because of case insensitivity of html, - # this table must be in a single case only (I've chosen to use all - # lower case). - # When adding NEW_TOKENS: update this hash table - # short names => long names - %short_to_long_names = ( - 'n' => 'numeric', - 'p' => 'paren', - 'q' => 'quote', - 's' => 'structure', - 'c' => 'comment', - 'v' => 'v-string', - 'cm' => 'comma', - 'w' => 'bareword', - 'co' => 'colon', - 'pu' => 'punctuation', - 'i' => 'identifier', - 'j' => 'label', - 'h' => 'here-doc-target', - 'hh' => 'here-doc-text', - 'k' => 'keyword', - 'sc' => 'semicolon', - 'm' => 'subroutine', - 'pd' => 'pod-text', - ); - - # Now we have to map actual token types into one of the above short - # names; any token types not mapped will get 'punctuation' - # properties. - - # The values of this hash table correspond to the keys of the - # previous hash table. - # The keys of this hash table are token types and can be seen - # by running with --dump-token-types (-dtt). - - # When adding NEW_TOKENS: update this hash table - # $type => $short_name - %token_short_names = ( - '#' => 'c', - 'n' => 'n', - 'v' => 'v', - 'k' => 'k', - 'F' => 'k', - 'Q' => 'q', - 'q' => 'q', - 'J' => 'j', - 'j' => 'j', - 'h' => 'h', - 'H' => 'hh', - 'w' => 'w', - ',' => 'cm', - '=>' => 'cm', - ';' => 'sc', - ':' => 'co', - 'f' => 'sc', - '(' => 'p', - ')' => 'p', - 'M' => 'm', - 'P' => 'pd', - 'A' => 'co', - ); - - # These token types will all be called identifiers for now - # FIXME: need to separate user defined modules as separate type - my @identifier = qw" i t U C Y Z G :: "; - @token_short_names{@identifier} = ('i') x scalar(@identifier); - - # These token types will be called 'structure' - my @structure = qw" { } "; - @token_short_names{@structure} = ('s') x scalar(@structure); - - # OLD NOTES: save for reference - # Any of these could be added later if it would be useful. - # For now, they will by default become punctuation - # my @list = qw" L R [ ] "; - # @token_long_names{@list} = ('non-structure') x scalar(@list); - # - # my @list = qw" - # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm - # "; - # @token_long_names{@list} = ('math') x scalar(@list); - # - # my @list = qw" & &= ~ ~= ^ ^= | |= "; - # @token_long_names{@list} = ('bit') x scalar(@list); - # - # my @list = qw" == != < > <= <=> "; - # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); - # - # my @list = qw" && || ! &&= ||= //= "; - # @token_long_names{@list} = ('logical') x scalar(@list); - # - # my @list = qw" . .= =~ !~ x x= "; - # @token_long_names{@list} = ('string-operators') x scalar(@list); - # - # # Incomplete.. - # my @list = qw" .. -> <> ... \ ? "; - # @token_long_names{@list} = ('misc-operators') x scalar(@list); - -} - -sub make_getopt_long_names { - my $class = shift; - my ($rgetopt_names) = @_; - while ( my ( $short_name, $name ) = each %short_to_long_names ) { - push @$rgetopt_names, "html-color-$name=s"; - push @$rgetopt_names, "html-italic-$name!"; - push @$rgetopt_names, "html-bold-$name!"; - } - push @$rgetopt_names, "html-color-background=s"; - push @$rgetopt_names, "html-linked-style-sheet=s"; - push @$rgetopt_names, "nohtml-style-sheets"; - push @$rgetopt_names, "html-pre-only"; - push @$rgetopt_names, "html-line-numbers"; - push @$rgetopt_names, "html-entities!"; - push @$rgetopt_names, "stylesheet"; - push @$rgetopt_names, "html-table-of-contents!"; - push @$rgetopt_names, "pod2html!"; - push @$rgetopt_names, "frames!"; - push @$rgetopt_names, "html-toc-extension=s"; - push @$rgetopt_names, "html-src-extension=s"; - - # Pod::Html parameters: - push @$rgetopt_names, "backlink=s"; - push @$rgetopt_names, "cachedir=s"; - push @$rgetopt_names, "htmlroot=s"; - push @$rgetopt_names, "libpods=s"; - push @$rgetopt_names, "podpath=s"; - push @$rgetopt_names, "podroot=s"; - push @$rgetopt_names, "title=s"; - - # Pod::Html parameters with leading 'pod' which will be removed - # before the call to Pod::Html - push @$rgetopt_names, "podquiet!"; - push @$rgetopt_names, "podverbose!"; - push @$rgetopt_names, "podrecurse!"; - push @$rgetopt_names, "podflush"; - push @$rgetopt_names, "podheader!"; - push @$rgetopt_names, "podindex!"; -} - -sub make_abbreviated_names { - - # We're appending things like this to the expansion list: - # 'hcc' => [qw(html-color-comment)], - # 'hck' => [qw(html-color-keyword)], - # etc - my $class = shift; - my ($rexpansion) = @_; - - # abbreviations for color/bold/italic properties - while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { - ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; - ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; - ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; - ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; - ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; - } - - # abbreviations for all other html options - ${$rexpansion}{"hcbg"} = ["html-color-background"]; - ${$rexpansion}{"pre"} = ["html-pre-only"]; - ${$rexpansion}{"toc"} = ["html-table-of-contents"]; - ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; - ${$rexpansion}{"nnn"} = ["html-line-numbers"]; - ${$rexpansion}{"hent"} = ["html-entities"]; - ${$rexpansion}{"nhent"} = ["nohtml-entities"]; - ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; - ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; - ${$rexpansion}{"ss"} = ["stylesheet"]; - ${$rexpansion}{"pod"} = ["pod2html"]; - ${$rexpansion}{"npod"} = ["nopod2html"]; - ${$rexpansion}{"frm"} = ["frames"]; - ${$rexpansion}{"nfrm"} = ["noframes"]; - ${$rexpansion}{"text"} = ["html-toc-extension"]; - ${$rexpansion}{"sext"} = ["html-src-extension"]; -} - -sub check_options { - - # This will be called once after options have been parsed - my $class = shift; - $rOpts = shift; - - # X11 color names for default settings that seemed to look ok - # (these color names are only used for programming clarity; the hex - # numbers are actually written) - use constant ForestGreen => "#228B22"; - use constant SaddleBrown => "#8B4513"; - use constant magenta4 => "#8B008B"; - use constant IndianRed3 => "#CD5555"; - use constant DeepSkyBlue4 => "#00688B"; - use constant MediumOrchid3 => "#B452CD"; - use constant black => "#000000"; - use constant white => "#FFFFFF"; - use constant red => "#FF0000"; - - # set default color, bold, italic properties - # anything not listed here will be given the default (punctuation) color -- - # these types currently not listed and get default: ws pu s sc cm co p - # When adding NEW_TOKENS: add an entry here if you don't want defaults - - # set_default_properties( $short_name, default_color, bold?, italic? ); - set_default_properties( 'c', ForestGreen, 0, 0 ); - set_default_properties( 'pd', ForestGreen, 0, 1 ); - set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown - set_default_properties( 'q', IndianRed3, 0, 0 ); - set_default_properties( 'hh', IndianRed3, 0, 1 ); - set_default_properties( 'h', IndianRed3, 1, 0 ); - set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); - set_default_properties( 'w', black, 0, 0 ); - set_default_properties( 'n', MediumOrchid3, 0, 0 ); - set_default_properties( 'v', MediumOrchid3, 0, 0 ); - set_default_properties( 'j', IndianRed3, 1, 0 ); - set_default_properties( 'm', red, 1, 0 ); - - set_default_color( 'html-color-background', white ); - set_default_color( 'html-color-punctuation', black ); - - # setup property lookup tables for tokens based on their short names - # every token type has a short name, and will use these tables - # to do the html markup - while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { - $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; - $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; - $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; - } - - # write style sheet to STDOUT and die if requested - if ( defined( $rOpts->{'stylesheet'} ) ) { - write_style_sheet_file('-'); - exit 1; - } - - # make sure user gives a file name after -css - if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { - $css_linkname = $rOpts->{'html-linked-style-sheet'}; - if ( $css_linkname =~ /^-/ ) { - die "You must specify a valid filename after -css\n"; - } - } - - # check for conflict - if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { - $rOpts->{'nohtml-style-sheets'} = 0; - warning("You can't specify both -css and -nss; -nss ignored\n"); - } - - # write a style sheet file if necessary - if ($css_linkname) { - - # if the selected filename exists, don't write, because user may - # have done some work by hand to create it; use backup name instead - # Also, this will avoid a potential disaster in which the user - # forgets to specify the style sheet, like this: - # perltidy -html -css myfile1.pl myfile2.pl - # This would cause myfile1.pl to parsed as the style sheet by GetOpts - my $css_filename = $css_linkname; - unless ( -e $css_filename ) { - write_style_sheet_file($css_filename); - } - } - $missing_html_entities = 1 unless $rOpts->{'html-entities'}; -} - -sub write_style_sheet_file { - - my $css_filename = shift; - my $fh; - unless ( $fh = IO::File->new("> $css_filename") ) { - die "can't open $css_filename: $!\n"; - } - write_style_sheet_data($fh); - eval { $fh->close }; -} - -sub write_style_sheet_data { - - # write the style sheet data to an open file handle - my $fh = shift; - - my $bg_color = $rOpts->{'html-color-background'}; - my $text_color = $rOpts->{'html-color-punctuation'}; - - # pre-bgcolor is new, and may not be defined - my $pre_bg_color = $rOpts->{'html-pre-color-background'}; - $pre_bg_color = $bg_color unless $pre_bg_color; - - $fh->print(<<"EOM"); -/* default style sheet generated by perltidy */ -body {background: $bg_color; color: $text_color} -pre { color: $text_color; - background: $pre_bg_color; - font-family: courier; - } - -EOM - - foreach my $short_name ( sort keys %short_to_long_names ) { - my $long_name = $short_to_long_names{$short_name}; - - my $abbrev = '.' . $short_name; - if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment - my $color = $html_color{$short_name}; - if ( !defined($color) ) { $color = $text_color } - $fh->print("$abbrev \{ color: $color;"); - - if ( $html_bold{$short_name} ) { - $fh->print(" font-weight:bold;"); - } - - if ( $html_italic{$short_name} ) { - $fh->print(" font-style:italic;"); - } - $fh->print("} /* $long_name */\n"); - } -} - -sub set_default_color { - - # make sure that options hash $rOpts->{$key} contains a valid color - my ( $key, $color ) = @_; - if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } - $rOpts->{$key} = check_RGB($color); -} - -sub check_RGB { - - # if color is a 6 digit hex RGB value, prepend a #, otherwise - # assume that it is a valid ascii color name - my ($color) = @_; - if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } - return $color; -} - -sub set_default_properties { - my ( $short_name, $color, $bold, $italic ) = @_; - - set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); - my $key; - $key = "html-bold-$short_to_long_names{$short_name}"; - $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; - $key = "html-italic-$short_to_long_names{$short_name}"; - $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; -} - -sub pod_to_html { - - # Use Pod::Html to process the pod and make the page - # then merge the perltidy code sections into it. - # return 1 if success, 0 otherwise - my $self = shift; - my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_; - my $input_file = $self->{_input_file}; - my $title = $self->{_title}; - my $success_flag = 0; - - # don't try to use pod2html if no pod - unless ($pod_string) { - return $success_flag; - } - - # Pod::Html requires a real temporary filename - # If we are making a frame, we have a name available - # Otherwise, we have to fine one - my $tmpfile; - if ( $rOpts->{'frames'} ) { - $tmpfile = $self->{_toc_filename}; - } - else { - $tmpfile = Perl::Tidy::make_temporary_filename(); - } - my $fh_tmp = IO::File->new( $tmpfile, 'w' ); - unless ($fh_tmp) { - warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; - return $success_flag; - } - - #------------------------------------------------------------------ - # Warning: a temporary file is open; we have to clean up if - # things go bad. From here on all returns should be by going to - # RETURN so that the temporary file gets unlinked. - #------------------------------------------------------------------ - - # write the pod text to the temporary file - $fh_tmp->print($pod_string); - $fh_tmp->close(); - - # Hand off the pod to pod2html. - # Note that we can use the same temporary filename for input and output - # because of the way pod2html works. - { - - my @args; - push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; - my $kw; - - # Flags with string args: - # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", - # "podpath=s", "podroot=s" - # Note: -css=s is handled by perltidy itself - foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) { - if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } - } - - # Toggle switches; these have extra leading 'pod' - # "header!", "index!", "recurse!", "quiet!", "verbose!" - foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) { - my $kwd = $kw; # allows us to strip 'pod' - if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } - elsif ( defined( $rOpts->{$kw} ) ) { - $kwd =~ s/^pod//; - push @args, "--no$kwd"; - } - } - - # "flush", - $kw = 'podflush'; - if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } - - # Must clean up if pod2html dies (it can); - # Be careful not to overwrite callers __DIE__ routine - local $SIG{__DIE__} = sub { - print $_[0]; - unlink $tmpfile if -e $tmpfile; - exit 1; - }; - - pod2html(@args); - } - $fh_tmp = IO::File->new( $tmpfile, 'r' ); - unless ($fh_tmp) { - - # this error shouldn't happen ... we just used this filename - warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; - goto RETURN; - } - - my $html_fh = $self->{_html_fh}; - my @toc; - my $in_toc; - my $no_print; - - # This routine will write the html selectively and store the toc - my $html_print = sub { - foreach (@_) { - $html_fh->print($_) unless ($no_print); - if ($in_toc) { push @toc, $_ } - } - }; - - # loop over lines of html output from pod2html and merge in - # the necessary perltidy html sections - my ( $saw_body, $saw_index, $saw_body_end ); - while ( my $line = $fh_tmp->getline() ) { - - if ( $line =~ /^\s*\s*$/i ) { - my $date = localtime; - $html_print->("\n"); - $html_print->($line); - } - - # Copy the perltidy css, if any, after tag - elsif ( $line =~ /^\s*\s*$/i ) { - $saw_body = 1; - $html_print->($css_string) if $css_string; - $html_print->($line); - - # add a top anchor and heading - $html_print->("\n"); - $title = escape_html($title); - $html_print->("

    $title

    \n"); - } - elsif ( $line =~ /^\s*\s*$/i ) { - $in_toc = 1; - - # when frames are used, an extra table of contents in the - # contents panel is confusing, so don't print it - $no_print = $rOpts->{'frames'} - || !$rOpts->{'html-table-of-contents'}; - $html_print->("

    Doc Index:

    \n") if $rOpts->{'frames'}; - $html_print->($line); - } - - # Copy the perltidy toc, if any, after the Pod::Html toc - elsif ( $line =~ /^\s*\s*$/i ) { - $saw_index = 1; - $html_print->($line); - if ($toc_string) { - $html_print->("
    \n") if $rOpts->{'frames'}; - $html_print->("

    Code Index:

    \n"); - my @toc = map { $_ .= "\n" } split /\n/, $toc_string; - $html_print->(@toc); - } - $in_toc = 0; - $no_print = 0; - } - - # Copy one perltidy section after each marker - elsif ( $line =~ /^(.*)(.*)$/ ) { - $line = $2; - $html_print->($1) if $1; - - # Intermingle code and pod sections if we saw multiple =cut's. - if ( $self->{_pod_cut_count} > 1 ) { - my $rpre_string = shift(@$rpre_string_stack); - if ($$rpre_string) { - $html_print->('
    ');
    -                    $html_print->($$rpre_string);
    -                    $html_print->('
    '); - } - else { - - # shouldn't happen: we stored a string before writing - # each marker. - warn -"Problem merging html stream with pod2html; order may be wrong\n"; - } - $html_print->($line); - } - - # If didn't see multiple =cut lines, we'll put the pod out first - # and then the code, because it's less confusing. - else { - - # since we are not intermixing code and pod, we don't need - # or want any
    lines which separated pod and code - $html_print->($line) unless ( $line =~ /^\s*
    \s*$/i ); - } - } - - # Copy any remaining code section before the tag - elsif ( $line =~ /^\s*<\/body>\s*$/i ) { - $saw_body_end = 1; - if (@$rpre_string_stack) { - unless ( $self->{_pod_cut_count} > 1 ) { - $html_print->('
    '); - } - while ( my $rpre_string = shift(@$rpre_string_stack) ) { - $html_print->('
    ');
    -                    $html_print->($$rpre_string);
    -                    $html_print->('
    '); - } - } - $html_print->($line); - } - else { - $html_print->($line); - } - } - - $success_flag = 1; - unless ($saw_body) { - warn "Did not see in pod2html output\n"; - $success_flag = 0; - } - unless ($saw_body_end) { - warn "Did not see in pod2html output\n"; - $success_flag = 0; - } - unless ($saw_index) { - warn "Did not find INDEX END in pod2html output\n"; - $success_flag = 0; - } - - RETURN: - eval { $html_fh->close() }; - - # note that we have to unlink tmpfile before making frames - # because the tmpfile may be one of the names used for frames - unlink $tmpfile if -e $tmpfile; - if ( $success_flag && $rOpts->{'frames'} ) { - $self->make_frame( \@toc ); - } - return $success_flag; -} - -sub make_frame { - - # Make a frame with table of contents in the left panel - # and the text in the right panel. - # On entry: - # $html_filename contains the no-frames html output - # $rtoc is a reference to an array with the table of contents - my $self = shift; - my ($rtoc) = @_; - my $input_file = $self->{_input_file}; - my $html_filename = $self->{_html_file}; - my $toc_filename = $self->{_toc_filename}; - my $src_filename = $self->{_src_filename}; - my $title = $self->{_title}; - $title = escape_html($title); - - # FUTURE input parameter: - my $top_basename = ""; - - # We need to produce 3 html files: - # 1. - the table of contents - # 2. - the contents (source code) itself - # 3. - the frame which contains them - - # get basenames for relative links - my ( $toc_basename, $toc_path ) = fileparse($toc_filename); - my ( $src_basename, $src_path ) = fileparse($src_filename); - - # 1. Make the table of contents panel, with appropriate changes - # to the anchor names - my $src_frame_name = 'SRC'; - my $first_anchor = - write_toc_html( $title, $toc_filename, $src_basename, $rtoc, - $src_frame_name ); - - # 2. The current .html filename is renamed to be the contents panel - rename( $html_filename, $src_filename ) - or die "Cannot rename $html_filename to $src_filename:$!\n"; - - # 3. Then use the original html filename for the frame - write_frame_html( - $title, $html_filename, $top_basename, - $toc_basename, $src_basename, $src_frame_name - ); -} - -sub write_toc_html { - - # write a separate html table of contents file for frames - my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; - my $fh = IO::File->new( $toc_filename, 'w' ) - or die "Cannot open $toc_filename:$!\n"; - $fh->print(< - -$title - - -

    $title

    -EOM - - my $first_anchor = - change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); - $fh->print( join "", @$rtoc ); - - $fh->print(< - -EOM - -} - -sub write_frame_html { - - # write an html file to be the table of contents frame - my ( - $title, $frame_filename, $top_basename, - $toc_basename, $src_basename, $src_frame_name - ) = @_; - - my $fh = IO::File->new( $frame_filename, 'w' ) - or die "Cannot open $toc_basename:$!\n"; - - $fh->print(< - - - -$title - -EOM - - # two left panels, one right, if master index file - if ($top_basename) { - $fh->print(< - - - - -EOM - } - - # one left panels, one right, if no master index file - else { - $fh->print(< - -EOM - } - $fh->print(< - -<body> -<p>If you see this message, you are using a non-frame-capable web client.</p> -<p>This document contains:</p> -<ul> -<li><a href="$toc_basename">A table of contents</a></li> -<li><a href="$src_basename">The source code</a></li> -</ul> -</body> - - - -EOM -} - -sub change_anchor_names { - - # add a filename and target to anchors - # also return the first anchor - my ( $rlines, $filename, $target ) = @_; - my $first_anchor; - foreach my $line (@$rlines) { - - # We're looking for lines like this: - #
  • SYNOPSIS
  • - # ---- - -------- ----------------- - # $1 $4 $5 - if ( $line =~ /^(.*)]*>(.*)$/i ) { - my $pre = $1; - my $name = $4; - my $post = $5; - my $href = "$filename#$name"; - $line = "$pre$post\n"; - unless ($first_anchor) { $first_anchor = $href } - } - } - return $first_anchor; -} - -sub close_html_file { - my $self = shift; - return unless $self->{_html_file_opened}; - - my $html_fh = $self->{_html_fh}; - my $rtoc_string = $self->{_rtoc_string}; - - # There are 3 basic paths to html output... - - # --------------------------------- - # Path 1: finish up if in -pre mode - # --------------------------------- - if ( $rOpts->{'html-pre-only'} ) { - $html_fh->print( <<"PRE_END"); -
-PRE_END - eval { $html_fh->close() }; - return; - } - - # Finish the index - $self->add_toc_item( 'EOF', 'EOF' ); - - my $rpre_string_stack = $self->{_rpre_string_stack}; - - # Patch to darken the
 background color in case of pod2html and
-    # interleaved code/documentation.  Otherwise, the distinction
-    # between code and documentation is blurred.
-    if (   $rOpts->{pod2html}
-        && $self->{_pod_cut_count} >= 1
-        && $rOpts->{'html-color-background'} eq '#FFFFFF' )
-    {
-        $rOpts->{'html-pre-color-background'} = '#F0F0F0';
-    }
-
-    # put the css or its link into a string, if used
-    my $css_string;
-    my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
-
-    # use css linked to another file
-    if ( $rOpts->{'html-linked-style-sheet'} ) {
-        $fh_css->print(
-            qq()
-        );
-    }
-
-    # use css embedded in this file
-    elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
-        $fh_css->print( <<'ENDCSS');
-
-ENDCSS
-    }
-
-    # -----------------------------------------------------------
-    # path 2: use pod2html if requested
-    #         If we fail for some reason, continue on to path 3
-    # -----------------------------------------------------------
-    if ( $rOpts->{'pod2html'} ) {
-        my $rpod_string = $self->{_rpod_string};
-        $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
-            $rpre_string_stack )
-          && return;
-    }
-
-    # --------------------------------------------------
-    # path 3: write code in html, with pod only in italics
-    # --------------------------------------------------
-    my $input_file = $self->{_input_file};
-    my $title      = escape_html($input_file);
-    my $date       = localtime;
-    $html_fh->print( <<"HTML_START");
-
-
-
-
-$title
-HTML_START
-
-    # output the css, if used
-    if ($css_string) {
-        $html_fh->print($css_string);
-        $html_fh->print( <<"ENDCSS");
-
-
-ENDCSS
-    }
-    else {
-
-        $html_fh->print( <<"HTML_START");
-
-{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
-HTML_START
-    }
-
-    $html_fh->print("\n");
-    $html_fh->print( <<"EOM");
-

$title

-EOM - - # copy the table of contents - if ( $$rtoc_string - && !$rOpts->{'frames'} - && $rOpts->{'html-table-of-contents'} ) - { - $html_fh->print($$rtoc_string); - } - - # copy the pre section(s) - my $fname_comment = $input_file; - $fname_comment =~ s/--+/-/g; # protect HTML comment tags - $html_fh->print( <<"END_PRE"); -
- -
-END_PRE
-
-    foreach my $rpre_string (@$rpre_string_stack) {
-        $html_fh->print($$rpre_string);
-    }
-
-    # and finish the html page
-    $html_fh->print( <<"HTML_END");
-
- - -HTML_END - eval { $html_fh->close() }; # could be object without close method - - if ( $rOpts->{'frames'} ) { - my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string; - $self->make_frame( \@toc ); - } -} - -sub markup_tokens { - my $self = shift; - my ( $rtokens, $rtoken_type, $rlevels ) = @_; - my ( @colored_tokens, $j, $string, $type, $token, $level ); - my $rlast_level = $self->{_rlast_level}; - my $rpackage_stack = $self->{_rpackage_stack}; - - for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { - $type = $$rtoken_type[$j]; - $token = $$rtokens[$j]; - $level = $$rlevels[$j]; - $level = 0 if ( $level < 0 ); - - #------------------------------------------------------- - # Update the package stack. The package stack is needed to keep - # the toc correct because some packages may be declared within - # blocks and go out of scope when we leave the block. - #------------------------------------------------------- - if ( $level > $$rlast_level ) { - unless ( $rpackage_stack->[ $level - 1 ] ) { - $rpackage_stack->[ $level - 1 ] = 'main'; - } - $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; - } - elsif ( $level < $$rlast_level ) { - my $package = $rpackage_stack->[$level]; - unless ($package) { $package = 'main' } - - # if we change packages due to a nesting change, we - # have to make an entry in the toc - if ( $package ne $rpackage_stack->[ $level + 1 ] ) { - $self->add_toc_item( $package, 'package' ); - } - } - $$rlast_level = $level; - - #------------------------------------------------------- - # Intercept a sub name here; split it - # into keyword 'sub' and sub name; and add an - # entry in the toc - #------------------------------------------------------- - if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { - $token = $self->markup_html_element( $1, 'k' ); - push @colored_tokens, $token; - $token = $2; - $type = 'M'; - - # but don't include sub declarations in the toc; - # these wlll have leading token types 'i;' - my $signature = join "", @$rtoken_type; - unless ( $signature =~ /^i;/ ) { - my $subname = $token; - $subname =~ s/[\s\(].*$//; # remove any attributes and prototype - $self->add_toc_item( $subname, 'sub' ); - } - } - - #------------------------------------------------------- - # Intercept a package name here; split it - # into keyword 'package' and name; add to the toc, - # and update the package stack - #------------------------------------------------------- - if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { - $token = $self->markup_html_element( $1, 'k' ); - push @colored_tokens, $token; - $token = $2; - $type = 'i'; - $self->add_toc_item( "$token", 'package' ); - $rpackage_stack->[$level] = $token; - } - - $token = $self->markup_html_element( $token, $type ); - push @colored_tokens, $token; - } - return ( \@colored_tokens ); -} - -sub markup_html_element { - my $self = shift; - my ( $token, $type ) = @_; - - return $token if ( $type eq 'b' ); # skip a blank token - return $token if ( $token =~ /^\s*$/ ); # skip a blank line - $token = escape_html($token); - - # get the short abbreviation for this token type - my $short_name = $token_short_names{$type}; - if ( !defined($short_name) ) { - $short_name = "pu"; # punctuation is default - } - - # handle style sheets.. - if ( !$rOpts->{'nohtml-style-sheets'} ) { - if ( $short_name ne 'pu' ) { - $token = qq() . $token . ""; - } - } - - # handle no style sheets.. - else { - my $color = $html_color{$short_name}; - - if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { - $token = qq() . $token . ""; - } - if ( $html_italic{$short_name} ) { $token = "$token" } - if ( $html_bold{$short_name} ) { $token = "$token" } - } - return $token; -} - -sub escape_html { - - my $token = shift; - if ($missing_html_entities) { - $token =~ s/\&/&/g; - $token =~ s/\/>/g; - $token =~ s/\"/"/g; - } - else { - HTML::Entities::encode_entities($token); - } - return $token; -} - -sub finish_formatting { - - # called after last line - my $self = shift; - $self->close_html_file(); - return; -} - -sub write_line { - - my $self = shift; - return unless $self->{_html_file_opened}; - my $html_pre_fh = $self->{_html_pre_fh}; - my ($line_of_tokens) = @_; - my $line_type = $line_of_tokens->{_line_type}; - my $input_line = $line_of_tokens->{_line_text}; - my $line_number = $line_of_tokens->{_line_number}; - chomp $input_line; - - # markup line of code.. - my $html_line; - if ( $line_type eq 'CODE' ) { - my $rtoken_type = $line_of_tokens->{_rtoken_type}; - my $rtokens = $line_of_tokens->{_rtokens}; - my $rlevels = $line_of_tokens->{_rlevels}; - - if ( $input_line =~ /(^\s*)/ ) { - $html_line = $1; - } - else { - $html_line = ""; - } - my ($rcolored_tokens) = - $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); - $html_line .= join '', @$rcolored_tokens; - } - - # markup line of non-code.. - else { - my $line_character; - if ( $line_type eq 'HERE' ) { $line_character = 'H' } - elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } - elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } - elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } - elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } - elsif ( $line_type eq 'END_START' ) { - $line_character = 'k'; - $self->add_toc_item( '__END__', '__END__' ); - } - elsif ( $line_type eq 'DATA_START' ) { - $line_character = 'k'; - $self->add_toc_item( '__DATA__', '__DATA__' ); - } - elsif ( $line_type =~ /^POD/ ) { - $line_character = 'P'; - if ( $rOpts->{'pod2html'} ) { - my $html_pod_fh = $self->{_html_pod_fh}; - if ( $line_type eq 'POD_START' ) { - - my $rpre_string_stack = $self->{_rpre_string_stack}; - my $rpre_string = $rpre_string_stack->[-1]; - - # if we have written any non-blank lines to the - # current pre section, start writing to a new output - # string - if ( $$rpre_string =~ /\S/ ) { - my $pre_string; - $html_pre_fh = - Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); - $self->{_html_pre_fh} = $html_pre_fh; - push @$rpre_string_stack, \$pre_string; - - # leave a marker in the pod stream so we know - # where to put the pre section we just - # finished. - my $for_html = '=for html'; # don't confuse pod utils - $html_pod_fh->print(< - -EOM - } - - # otherwise, just clear the current string and start - # over - else { - $$rpre_string = ""; - $html_pod_fh->print("\n"); - } - } - $html_pod_fh->print( $input_line . "\n" ); - if ( $line_type eq 'POD_END' ) { - $self->{_pod_cut_count}++; - $html_pod_fh->print("\n"); - } - return; - } - } - else { $line_character = 'Q' } - $html_line = $self->markup_html_element( $input_line, $line_character ); - } - - # add the line number if requested - if ( $rOpts->{'html-line-numbers'} ) { - my $extra_space .= - ( $line_number < 10 ) ? " " - : ( $line_number < 100 ) ? " " - : ( $line_number < 1000 ) ? " " - : ""; - $html_line = $extra_space . $line_number . " " . $html_line; - } - - # write the line - $html_pre_fh->print("$html_line\n"); -} - -##################################################################### -# -# The Perl::Tidy::Formatter package adds indentation, whitespace, and -# line breaks to the token stream -# -# WARNING: This is not a real class for speed reasons. Only one -# Formatter may be used. -# -##################################################################### - -package Perl::Tidy::Formatter; - -BEGIN { - - # Caution: these debug flags produce a lot of output - # They should all be 0 except when debugging small scripts - use constant FORMATTER_DEBUG_FLAG_BOND => 0; - use constant FORMATTER_DEBUG_FLAG_BREAK => 0; - use constant FORMATTER_DEBUG_FLAG_CI => 0; - use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; - use constant FORMATTER_DEBUG_FLAG_FORCE => 0; - use constant FORMATTER_DEBUG_FLAG_LIST => 0; - use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; - use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; - use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; - use constant FORMATTER_DEBUG_FLAG_STORE => 0; - use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; - use constant FORMATTER_DEBUG_FLAG_WHITE => 0; - - my $debug_warning = sub { - print "FORMATTER_DEBUGGING with key $_[0]\n"; - }; - - FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); - FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); - FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); - FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); - FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); - FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); - FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); - FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); - FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); - FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); - FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); - FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); -} - -use Carp; -use vars qw{ - - @gnu_stack - $max_gnu_stack_index - $gnu_position_predictor - $line_start_index_to_go - $last_indentation_written - $last_unadjusted_indentation - $last_leading_token - - $saw_VERSION_in_this_file - $saw_END_or_DATA_ - - @gnu_item_list - $max_gnu_item_index - $gnu_sequence_number - $last_output_indentation - %last_gnu_equals - %gnu_comma_count - %gnu_arrow_count - - @block_type_to_go - @type_sequence_to_go - @container_environment_to_go - @bond_strength_to_go - @forced_breakpoint_to_go - @lengths_to_go - @levels_to_go - @leading_spaces_to_go - @reduced_spaces_to_go - @matching_token_to_go - @mate_index_to_go - @nesting_blocks_to_go - @ci_levels_to_go - @nesting_depth_to_go - @nobreak_to_go - @old_breakpoint_to_go - @tokens_to_go - @types_to_go - - %saved_opening_indentation - - $max_index_to_go - $comma_count_in_batch - $old_line_count_in_batch - $last_nonblank_index_to_go - $last_nonblank_type_to_go - $last_nonblank_token_to_go - $last_last_nonblank_index_to_go - $last_last_nonblank_type_to_go - $last_last_nonblank_token_to_go - @nonblank_lines_at_depth - $starting_in_quote - $ending_in_quote - - $in_format_skipping_section - $format_skipping_pattern_begin - $format_skipping_pattern_end - - $forced_breakpoint_count - $forced_breakpoint_undo_count - @forced_breakpoint_undo_stack - %postponed_breakpoint - - $tabbing - $embedded_tab_count - $first_embedded_tab_at - $last_embedded_tab_at - $deleted_semicolon_count - $first_deleted_semicolon_at - $last_deleted_semicolon_at - $added_semicolon_count - $first_added_semicolon_at - $last_added_semicolon_at - $first_tabbing_disagreement - $last_tabbing_disagreement - $in_tabbing_disagreement - $tabbing_disagreement_count - $input_line_tabbing - - $last_line_type - $last_line_leading_type - $last_line_leading_level - $last_last_line_leading_level - - %block_leading_text - %block_opening_line_number - $csc_new_statement_ok - $accumulating_text_for_block - $leading_block_text - $rleading_block_if_elsif_text - $leading_block_text_level - $leading_block_text_length_exceeded - $leading_block_text_line_length - $leading_block_text_line_number - $closing_side_comment_prefix_pattern - $closing_side_comment_list_pattern - - $last_nonblank_token - $last_nonblank_type - $last_last_nonblank_token - $last_last_nonblank_type - $last_nonblank_block_type - $last_output_level - %is_do_follower - %is_if_brace_follower - %space_after_keyword - $rbrace_follower - $looking_for_else - %is_last_next_redo_return - %is_other_brace_follower - %is_else_brace_follower - %is_anon_sub_brace_follower - %is_anon_sub_1_brace_follower - %is_sort_map_grep - %is_sort_map_grep_eval - %is_sort_map_grep_eval_do - %is_block_without_semicolon - %is_if_unless - %is_and_or - %is_assignment - %is_chain_operator - %is_if_unless_and_or_last_next_redo_return - %is_until_while_for_if_elsif_else - - @has_broken_sublist - @dont_align - @want_comma_break - - $is_static_block_comment - $index_start_one_line_block - $semicolons_before_block_self_destruct - $index_max_forced_break - $input_line_number - $diagnostics_object - $vertical_aligner_object - $logger_object - $file_writer_object - $formatter_self - @ci_stack - $last_line_had_side_comment - %want_break_before - %outdent_keyword - $static_block_comment_pattern - $static_side_comment_pattern - %opening_vertical_tightness - %closing_vertical_tightness - %closing_token_indentation - - %opening_token_right - %stack_opening_token - %stack_closing_token - - $block_brace_vertical_tightness_pattern - - $rOpts_add_newlines - $rOpts_add_whitespace - $rOpts_block_brace_tightness - $rOpts_block_brace_vertical_tightness - $rOpts_brace_left_and_indent - $rOpts_comma_arrow_breakpoints - $rOpts_break_at_old_keyword_breakpoints - $rOpts_break_at_old_comma_breakpoints - $rOpts_break_at_old_logical_breakpoints - $rOpts_break_at_old_ternary_breakpoints - $rOpts_closing_side_comment_else_flag - $rOpts_closing_side_comment_maximum_text - $rOpts_continuation_indentation - $rOpts_cuddled_else - $rOpts_delete_old_whitespace - $rOpts_fuzzy_line_length - $rOpts_indent_columns - $rOpts_line_up_parentheses - $rOpts_maximum_fields_per_table - $rOpts_maximum_line_length - $rOpts_short_concatenation_item_length - $rOpts_swallow_optional_blank_lines - $rOpts_ignore_old_breakpoints - $rOpts_format_skipping - $rOpts_space_function_paren - $rOpts_space_keyword_paren - $rOpts_keep_interior_semicolons - - $half_maximum_line_length - - %is_opening_type - %is_closing_type - %is_keyword_returning_list - %tightness - %matching_token - $rOpts - %right_bond_strength - %left_bond_strength - %binary_ws_rules - %want_left_space - %want_right_space - %is_digraph - %is_trigraph - $bli_pattern - $bli_list_string - %is_closing_type - %is_opening_type - %is_closing_token - %is_opening_token -}; - -BEGIN { - - # default list of block types for which -bli would apply - $bli_list_string = 'if else elsif unless while for foreach do : sub'; - - @_ = qw( - .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> - <= >= == =~ !~ != ++ -- /= x= - ); - @is_digraph{@_} = (1) x scalar(@_); - - @_ = qw( ... **= <<= >>= &&= ||= //= <=> ); - @is_trigraph{@_} = (1) x scalar(@_); - - @_ = qw( - = **= += *= &= <<= &&= - -= /= |= >>= ||= //= - .= %= ^= - x= - ); - @is_assignment{@_} = (1) x scalar(@_); - - @_ = qw( - grep - keys - map - reverse - sort - split - ); - @is_keyword_returning_list{@_} = (1) x scalar(@_); - - @_ = qw(is if unless and or err last next redo return); - @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); - - # always break after a closing curly of these block types: - @_ = qw(until while for if elsif else); - @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_); - - @_ = qw(last next redo return); - @is_last_next_redo_return{@_} = (1) x scalar(@_); - - @_ = qw(sort map grep); - @is_sort_map_grep{@_} = (1) x scalar(@_); - - @_ = qw(sort map grep eval); - @is_sort_map_grep_eval{@_} = (1) x scalar(@_); - - @_ = qw(sort map grep eval do); - @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_); - - @_ = qw(if unless); - @is_if_unless{@_} = (1) x scalar(@_); - - @_ = qw(and or err); - @is_and_or{@_} = (1) x scalar(@_); - - # Identify certain operators which often occur in chains. - # Note: the minus (-) causes a side effect of padding of the first line in - # something like this (by sub set_logical_padding): - # Checkbutton => 'Transmission checked', - # -variable => \$TRANS - # This usually improves appearance so it seems ok. - @_ = qw(&& || and or : ? . + - * /); - @is_chain_operator{@_} = (1) x scalar(@_); - - # We can remove semicolons after blocks preceded by these keywords - @_ = - qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else - unless while until for foreach); - @is_block_without_semicolon{@_} = (1) x scalar(@_); - - # 'L' is token for opening { at hash key - @_ = qw" L { ( [ "; - @is_opening_type{@_} = (1) x scalar(@_); - - # 'R' is token for closing } at hash key - @_ = qw" R } ) ] "; - @is_closing_type{@_} = (1) x scalar(@_); - - @_ = qw" { ( [ "; - @is_opening_token{@_} = (1) x scalar(@_); - - @_ = qw" } ) ] "; - @is_closing_token{@_} = (1) x scalar(@_); -} - -# whitespace codes -use constant WS_YES => 1; -use constant WS_OPTIONAL => 0; -use constant WS_NO => -1; - -# Token bond strengths. -use constant NO_BREAK => 10000; -use constant VERY_STRONG => 100; -use constant STRONG => 2.1; -use constant NOMINAL => 1.1; -use constant WEAK => 0.8; -use constant VERY_WEAK => 0.55; - -# values for testing indexes in output array -use constant UNDEFINED_INDEX => -1; - -# Maximum number of little messages; probably need not be changed. -use constant MAX_NAG_MESSAGES => 6; - -# increment between sequence numbers for each type -# For example, ?: pairs might have numbers 7,11,15,... -use constant TYPE_SEQUENCE_INCREMENT => 4; - -{ - - # methods to count instances - my $_count = 0; - sub get_count { $_count; } - sub _increment_count { ++$_count } - sub _decrement_count { --$_count } -} - -sub trim { - - # trim leading and trailing whitespace from a string - $_[0] =~ s/\s+$//; - $_[0] =~ s/^\s+//; - return $_[0]; -} - -sub split_words { - - # given a string containing words separated by whitespace, - # return the list of words - my ($str) = @_; - return unless $str; - $str =~ s/\s+$//; - $str =~ s/^\s+//; - return split( /\s+/, $str ); -} - -# interface to Perl::Tidy::Logger routines -sub warning { - if ($logger_object) { - $logger_object->warning(@_); - } -} - -sub complain { - if ($logger_object) { - $logger_object->complain(@_); - } -} - -sub write_logfile_entry { - if ($logger_object) { - $logger_object->write_logfile_entry(@_); - } -} - -sub black_box { - if ($logger_object) { - $logger_object->black_box(@_); - } -} - -sub report_definite_bug { - if ($logger_object) { - $logger_object->report_definite_bug(); - } -} - -sub get_saw_brace_error { - if ($logger_object) { - $logger_object->get_saw_brace_error(); - } -} - -sub we_are_at_the_last_line { - if ($logger_object) { - $logger_object->we_are_at_the_last_line(); - } -} - -# interface to Perl::Tidy::Diagnostics routine -sub write_diagnostics { - - if ($diagnostics_object) { - $diagnostics_object->write_diagnostics(@_); - } -} - -sub get_added_semicolon_count { - my $self = shift; - return $added_semicolon_count; -} - -sub DESTROY { - $_[0]->_decrement_count(); -} - -sub new { - - my $class = shift; - - # we are given an object with a write_line() method to take lines - my %defaults = ( - sink_object => undef, - diagnostics_object => undef, - logger_object => undef, - ); - my %args = ( %defaults, @_ ); - - $logger_object = $args{logger_object}; - $diagnostics_object = $args{diagnostics_object}; - - # we create another object with a get_line() and peek_ahead() method - my $sink_object = $args{sink_object}; - $file_writer_object = - Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); - - # initialize the leading whitespace stack to negative levels - # so that we can never run off the end of the stack - $gnu_position_predictor = 0; # where the current token is predicted to be - $max_gnu_stack_index = 0; - $max_gnu_item_index = -1; - $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); - @gnu_item_list = (); - $last_output_indentation = 0; - $last_indentation_written = 0; - $last_unadjusted_indentation = 0; - $last_leading_token = ""; - - $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; - $saw_END_or_DATA_ = 0; - - @block_type_to_go = (); - @type_sequence_to_go = (); - @container_environment_to_go = (); - @bond_strength_to_go = (); - @forced_breakpoint_to_go = (); - @lengths_to_go = (); # line length to start of ith token - @levels_to_go = (); - @matching_token_to_go = (); - @mate_index_to_go = (); - @nesting_blocks_to_go = (); - @ci_levels_to_go = (); - @nesting_depth_to_go = (0); - @nobreak_to_go = (); - @old_breakpoint_to_go = (); - @tokens_to_go = (); - @types_to_go = (); - @leading_spaces_to_go = (); - @reduced_spaces_to_go = (); - - @dont_align = (); - @has_broken_sublist = (); - @want_comma_break = (); - - @ci_stack = (""); - $first_tabbing_disagreement = 0; - $last_tabbing_disagreement = 0; - $tabbing_disagreement_count = 0; - $in_tabbing_disagreement = 0; - $input_line_tabbing = undef; - - $last_line_type = ""; - $last_last_line_leading_level = 0; - $last_line_leading_level = 0; - $last_line_leading_type = '#'; - - $last_nonblank_token = ';'; - $last_nonblank_type = ';'; - $last_last_nonblank_token = ';'; - $last_last_nonblank_type = ';'; - $last_nonblank_block_type = ""; - $last_output_level = 0; - $looking_for_else = 0; - $embedded_tab_count = 0; - $first_embedded_tab_at = 0; - $last_embedded_tab_at = 0; - $deleted_semicolon_count = 0; - $first_deleted_semicolon_at = 0; - $last_deleted_semicolon_at = 0; - $added_semicolon_count = 0; - $first_added_semicolon_at = 0; - $last_added_semicolon_at = 0; - $last_line_had_side_comment = 0; - $is_static_block_comment = 0; - %postponed_breakpoint = (); - - # variables for adding side comments - %block_leading_text = (); - %block_opening_line_number = (); - $csc_new_statement_ok = 1; - - %saved_opening_indentation = (); - $in_format_skipping_section = 0; - - reset_block_text_accumulator(); - - prepare_for_new_input_lines(); - - $vertical_aligner_object = - Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, - $logger_object, $diagnostics_object ); - - if ( $rOpts->{'entab-leading-whitespace'} ) { - write_logfile_entry( -"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" - ); - } - elsif ( $rOpts->{'tabs'} ) { - write_logfile_entry("Indentation will be with a tab character\n"); - } - else { - write_logfile_entry( - "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); - } - - # This was the start of a formatter referent, but object-oriented - # coding has turned out to be too slow here. - $formatter_self = {}; - - bless $formatter_self, $class; - - # Safety check..this is not a class yet - if ( _increment_count() > 1 ) { - confess -"Attempt to create more than 1 object in $class, which is not a true class yet\n"; - } - return $formatter_self; -} - -sub prepare_for_new_input_lines { - - $gnu_sequence_number++; # increment output batch counter - %last_gnu_equals = (); - %gnu_comma_count = (); - %gnu_arrow_count = (); - $line_start_index_to_go = 0; - $max_gnu_item_index = UNDEFINED_INDEX; - $index_max_forced_break = UNDEFINED_INDEX; - $max_index_to_go = UNDEFINED_INDEX; - $last_nonblank_index_to_go = UNDEFINED_INDEX; - $last_nonblank_type_to_go = ''; - $last_nonblank_token_to_go = ''; - $last_last_nonblank_index_to_go = UNDEFINED_INDEX; - $last_last_nonblank_type_to_go = ''; - $last_last_nonblank_token_to_go = ''; - $forced_breakpoint_count = 0; - $forced_breakpoint_undo_count = 0; - $rbrace_follower = undef; - $lengths_to_go[0] = 0; - $old_line_count_in_batch = 1; - $comma_count_in_batch = 0; - $starting_in_quote = 0; - - destroy_one_line_block(); -} - -sub write_line { - - my $self = shift; - my ($line_of_tokens) = @_; - - my $line_type = $line_of_tokens->{_line_type}; - my $input_line = $line_of_tokens->{_line_text}; - - # _line_type codes are: - # SYSTEM - system-specific code before hash-bang line - # CODE - line of perl code (including comments) - # POD_START - line starting pod, such as '=head' - # POD - pod documentation text - # POD_END - last line of pod section, '=cut' - # HERE - text of here-document - # HERE_END - last line of here-doc (target word) - # FORMAT - format section - # FORMAT_END - last line of format section, '.' - # DATA_START - __DATA__ line - # DATA - unidentified text following __DATA__ - # END_START - __END__ line - # END - unidentified text following __END__ - # ERROR - we are in big trouble, probably not a perl script - - # put a blank line after an =cut which comes before __END__ and __DATA__ - # (required by podchecker) - if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { - $file_writer_object->reset_consecutive_blank_lines(); - if ( $input_line !~ /^\s*$/ ) { want_blank_line() } - } - - # handle line of code.. - if ( $line_type eq 'CODE' ) { - - # let logger see all non-blank lines of code - if ( $input_line !~ /^\s*$/ ) { - my $output_line_number = - $vertical_aligner_object->get_output_line_number(); - black_box( $line_of_tokens, $output_line_number ); - } - print_line_of_tokens($line_of_tokens); - } - - # handle line of non-code.. - else { - - # set special flags - my $skip_line = 0; - my $tee_line = 0; - if ( $line_type =~ /^POD/ ) { - - # Pod docs should have a preceding blank line. But be - # very careful in __END__ and __DATA__ sections, because: - # 1. the user may be using this section for any purpose whatsoever - # 2. the blank counters are not active there - # It should be safe to request a blank line between an - # __END__ or __DATA__ and an immediately following '=head' - # type line, (types END_START and DATA_START), but not for - # any other lines of type END or DATA. - if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } - if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } - if ( !$skip_line - && $line_type eq 'POD_START' - && $last_line_type !~ /^(END|DATA)$/ ) - { - want_blank_line(); - } - } - - # leave the blank counters in a predictable state - # after __END__ or __DATA__ - elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { - $file_writer_object->reset_consecutive_blank_lines(); - $saw_END_or_DATA_ = 1; - } - - # write unindented non-code line - if ( !$skip_line ) { - if ($tee_line) { $file_writer_object->tee_on() } - write_unindented_line($input_line); - if ($tee_line) { $file_writer_object->tee_off() } - } - } - $last_line_type = $line_type; -} - -sub create_one_line_block { - $index_start_one_line_block = $_[0]; - $semicolons_before_block_self_destruct = $_[1]; -} - -sub destroy_one_line_block { - $index_start_one_line_block = UNDEFINED_INDEX; - $semicolons_before_block_self_destruct = 0; -} - -sub leading_spaces_to_go { - - # return the number of indentation spaces for a token in the output stream; - # these were previously stored by 'set_leading_whitespace'. - - return get_SPACES( $leading_spaces_to_go[ $_[0] ] ); - -} - -sub get_SPACES { - - # return the number of leading spaces associated with an indentation - # variable $indentation is either a constant number of spaces or an object - # with a get_SPACES method. - my $indentation = shift; - return ref($indentation) ? $indentation->get_SPACES() : $indentation; -} - -sub get_RECOVERABLE_SPACES { - - # return the number of spaces (+ means shift right, - means shift left) - # that we would like to shift a group of lines with the same indentation - # to get them to line up with their opening parens - my $indentation = shift; - return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; -} - -sub get_AVAILABLE_SPACES_to_go { - - my $item = $leading_spaces_to_go[ $_[0] ]; - - # return the number of available leading spaces associated with an - # indentation variable. $indentation is either a constant number of - # spaces or an object with a get_AVAILABLE_SPACES method. - return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; -} - -sub new_lp_indentation_item { - - # this is an interface to the IndentationItem class - my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; - - # A negative level implies not to store the item in the item_list - my $index = 0; - if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } - - my $item = Perl::Tidy::IndentationItem->new( - $spaces, $level, - $ci_level, $available_spaces, - $index, $gnu_sequence_number, - $align_paren, $max_gnu_stack_index, - $line_start_index_to_go, - ); - - if ( $level >= 0 ) { - $gnu_item_list[$max_gnu_item_index] = $item; - } - - return $item; -} - -sub set_leading_whitespace { - - # This routine defines leading whitespace - # given: the level and continuation_level of a token, - # define: space count of leading string which would apply if it - # were the first token of a new line. - - my ( $level, $ci_level, $in_continued_quote ) = @_; - - # modify for -bli, which adds one continuation indentation for - # opening braces - if ( $rOpts_brace_left_and_indent - && $max_index_to_go == 0 - && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) - { - $ci_level++; - } - - # patch to avoid trouble when input file has negative indentation. - # other logic should catch this error. - if ( $level < 0 ) { $level = 0 } - - #------------------------------------------- - # handle the standard indentation scheme - #------------------------------------------- - unless ($rOpts_line_up_parentheses) { - my $space_count = - $ci_level * $rOpts_continuation_indentation + - $level * $rOpts_indent_columns; - my $ci_spaces = - ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; - - if ($in_continued_quote) { - $space_count = 0; - $ci_spaces = 0; - } - $leading_spaces_to_go[$max_index_to_go] = $space_count; - $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; - return; - } - - #------------------------------------------------------------- - # handle case of -lp indentation.. - #------------------------------------------------------------- - - # The continued_quote flag means that this is the first token of a - # line, and it is the continuation of some kind of multi-line quote - # or pattern. It requires special treatment because it must have no - # added leading whitespace. So we create a special indentation item - # which is not in the stack. - if ($in_continued_quote) { - my $space_count = 0; - my $available_space = 0; - $level = -1; # flag to prevent storing in item_list - $leading_spaces_to_go[$max_index_to_go] = - $reduced_spaces_to_go[$max_index_to_go] = - new_lp_indentation_item( $space_count, $level, $ci_level, - $available_space, 0 ); - return; - } - - # get the top state from the stack - my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); - my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); - my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); - - my $type = $types_to_go[$max_index_to_go]; - my $token = $tokens_to_go[$max_index_to_go]; - my $total_depth = $nesting_depth_to_go[$max_index_to_go]; - - if ( $type eq '{' || $type eq '(' ) { - - $gnu_comma_count{ $total_depth + 1 } = 0; - $gnu_arrow_count{ $total_depth + 1 } = 0; - - # If we come to an opening token after an '=' token of some type, - # see if it would be helpful to 'break' after the '=' to save space - my $last_equals = $last_gnu_equals{$total_depth}; - if ( $last_equals && $last_equals > $line_start_index_to_go ) { - - # find the position if we break at the '=' - my $i_test = $last_equals; - if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } - - # TESTING - ##my $too_close = ($i_test==$max_index_to_go-1); - - my $test_position = total_line_length( $i_test, $max_index_to_go ); - - if ( - - # the equals is not just before an open paren (testing) - ##!$too_close && - - # if we are beyond the midpoint - $gnu_position_predictor > $half_maximum_line_length - - # or we are beyont the 1/4 point and there was an old - # break at the equals - || ( - $gnu_position_predictor > $half_maximum_line_length / 2 - && ( - $old_breakpoint_to_go[$last_equals] - || ( $last_equals > 0 - && $old_breakpoint_to_go[ $last_equals - 1 ] ) - || ( $last_equals > 1 - && $types_to_go[ $last_equals - 1 ] eq 'b' - && $old_breakpoint_to_go[ $last_equals - 2 ] ) - ) - ) - ) - { - - # then make the switch -- note that we do not set a real - # breakpoint here because we may not really need one; sub - # scan_list will do that if necessary - $line_start_index_to_go = $i_test + 1; - $gnu_position_predictor = $test_position; - } - } - } - - # Check for decreasing depth .. - # Note that one token may have both decreasing and then increasing - # depth. For example, (level, ci) can go from (1,1) to (2,0). So, - # in this example we would first go back to (1,0) then up to (2,0) - # in a single call. - if ( $level < $current_level || $ci_level < $current_ci_level ) { - - # loop to find the first entry at or completely below this level - my ( $lev, $ci_lev ); - while (1) { - if ($max_gnu_stack_index) { - - # save index of token which closes this level - $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); - - # Undo any extra indentation if we saw no commas - my $available_spaces = - $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); - - my $comma_count = 0; - my $arrow_count = 0; - if ( $type eq '}' || $type eq ')' ) { - $comma_count = $gnu_comma_count{$total_depth}; - $arrow_count = $gnu_arrow_count{$total_depth}; - $comma_count = 0 unless $comma_count; - $arrow_count = 0 unless $arrow_count; - } - $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count); - $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count); - - if ( $available_spaces > 0 ) { - - if ( $comma_count <= 0 || $arrow_count > 0 ) { - - my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); - my $seqno = - $gnu_stack[$max_gnu_stack_index] - ->get_SEQUENCE_NUMBER(); - - # Be sure this item was created in this batch. This - # should be true because we delete any available - # space from open items at the end of each batch. - if ( $gnu_sequence_number != $seqno - || $i > $max_gnu_item_index ) - { - warning( -"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" - ); - report_definite_bug(); - } - - else { - if ( $arrow_count == 0 ) { - $gnu_item_list[$i] - ->permanently_decrease_AVAILABLE_SPACES( - $available_spaces); - } - else { - $gnu_item_list[$i] - ->tentatively_decrease_AVAILABLE_SPACES( - $available_spaces); - } - - my $j; - for ( - $j = $i + 1 ; - $j <= $max_gnu_item_index ; - $j++ - ) - { - $gnu_item_list[$j] - ->decrease_SPACES($available_spaces); - } - } - } - } - - # go down one level - --$max_gnu_stack_index; - $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); - $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); - - # stop when we reach a level at or below the current level - if ( $lev <= $level && $ci_lev <= $ci_level ) { - $space_count = - $gnu_stack[$max_gnu_stack_index]->get_SPACES(); - $current_level = $lev; - $current_ci_level = $ci_lev; - last; - } - } - - # reached bottom of stack .. should never happen because - # only negative levels can get here, and $level was forced - # to be positive above. - else { - warning( -"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" - ); - report_definite_bug(); - last; - } - } - } - - # handle increasing depth - if ( $level > $current_level || $ci_level > $current_ci_level ) { - - # Compute the standard incremental whitespace. This will be - # the minimum incremental whitespace that will be used. This - # choice results in a smooth transition between the gnu-style - # and the standard style. - my $standard_increment = - ( $level - $current_level ) * $rOpts_indent_columns + - ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; - - # Now we have to define how much extra incremental space - # ("$available_space") we want. This extra space will be - # reduced as necessary when long lines are encountered or when - # it becomes clear that we do not have a good list. - my $available_space = 0; - my $align_paren = 0; - my $excess = 0; - - # initialization on empty stack.. - if ( $max_gnu_stack_index == 0 ) { - $space_count = $level * $rOpts_indent_columns; - } - - # if this is a BLOCK, add the standard increment - elsif ($last_nonblank_block_type) { - $space_count += $standard_increment; - } - - # if last nonblank token was not structural indentation, - # just use standard increment - elsif ( $last_nonblank_type ne '{' ) { - $space_count += $standard_increment; - } - - # otherwise use the space to the first non-blank level change token - else { - - $space_count = $gnu_position_predictor; - - my $min_gnu_indentation = - $gnu_stack[$max_gnu_stack_index]->get_SPACES(); - - $available_space = $space_count - $min_gnu_indentation; - if ( $available_space >= $standard_increment ) { - $min_gnu_indentation += $standard_increment; - } - elsif ( $available_space > 1 ) { - $min_gnu_indentation += $available_space + 1; - } - elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { - if ( ( $tightness{$last_nonblank_token} < 2 ) ) { - $min_gnu_indentation += 2; - } - else { - $min_gnu_indentation += 1; - } - } - else { - $min_gnu_indentation += $standard_increment; - } - $available_space = $space_count - $min_gnu_indentation; - - if ( $available_space < 0 ) { - $space_count = $min_gnu_indentation; - $available_space = 0; - } - $align_paren = 1; - } - - # update state, but not on a blank token - if ( $types_to_go[$max_index_to_go] ne 'b' ) { - - $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); - - ++$max_gnu_stack_index; - $gnu_stack[$max_gnu_stack_index] = - new_lp_indentation_item( $space_count, $level, $ci_level, - $available_space, $align_paren ); - - # If the opening paren is beyond the half-line length, then - # we will use the minimum (standard) indentation. This will - # help avoid problems associated with running out of space - # near the end of a line. As a result, in deeply nested - # lists, there will be some indentations which are limited - # to this minimum standard indentation. But the most deeply - # nested container will still probably be able to shift its - # parameters to the right for proper alignment, so in most - # cases this will not be noticable. - if ( $available_space > 0 - && $space_count > $half_maximum_line_length ) - { - $gnu_stack[$max_gnu_stack_index] - ->tentatively_decrease_AVAILABLE_SPACES($available_space); - } - } - } - - # Count commas and look for non-list characters. Once we see a - # non-list character, we give up and don't look for any more commas. - if ( $type eq '=>' ) { - $gnu_arrow_count{$total_depth}++; - - # tentatively treating '=>' like '=' for estimating breaks - # TODO: this could use some experimentation - $last_gnu_equals{$total_depth} = $max_index_to_go; - } - - elsif ( $type eq ',' ) { - $gnu_comma_count{$total_depth}++; - } - - elsif ( $is_assignment{$type} ) { - $last_gnu_equals{$total_depth} = $max_index_to_go; - } - - # this token might start a new line - # if this is a non-blank.. - if ( $type ne 'b' ) { - - # and if .. - if ( - - # this is the first nonblank token of the line - $max_index_to_go == 1 && $types_to_go[0] eq 'b' - - # or previous character was one of these: - || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ - - # or previous character was opening and this does not close it - || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) - || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) - - # or this token is one of these: - || $type =~ /^([\.]|\|\||\&\&)$/ - - # or this is a closing structure - || ( $last_nonblank_type_to_go eq '}' - && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) - - # or previous token was keyword 'return' - || ( $last_nonblank_type_to_go eq 'k' - && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) - - # or starting a new line at certain keywords is fine - || ( $type eq 'k' - && $is_if_unless_and_or_last_next_redo_return{$token} ) - - # or this is after an assignment after a closing structure - || ( - $is_assignment{$last_nonblank_type_to_go} - && ( - $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ - - # and it is significantly to the right - || $gnu_position_predictor > $half_maximum_line_length - ) - ) - ) - { - check_for_long_gnu_style_lines(); - $line_start_index_to_go = $max_index_to_go; - - # back up 1 token if we want to break before that type - # otherwise, we may strand tokens like '?' or ':' on a line - if ( $line_start_index_to_go > 0 ) { - if ( $last_nonblank_type_to_go eq 'k' ) { - - if ( $want_break_before{$last_nonblank_token_to_go} ) { - $line_start_index_to_go--; - } - } - elsif ( $want_break_before{$last_nonblank_type_to_go} ) { - $line_start_index_to_go--; - } - } - } - } - - # remember the predicted position of this token on the output line - if ( $max_index_to_go > $line_start_index_to_go ) { - $gnu_position_predictor = - total_line_length( $line_start_index_to_go, $max_index_to_go ); - } - else { - $gnu_position_predictor = $space_count + - token_sequence_length( $max_index_to_go, $max_index_to_go ); - } - - # store the indentation object for this token - # this allows us to manipulate the leading whitespace - # (in case we have to reduce indentation to fit a line) without - # having to change any token values - $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; - $reduced_spaces_to_go[$max_index_to_go] = - ( $max_gnu_stack_index > 0 && $ci_level ) - ? $gnu_stack[ $max_gnu_stack_index - 1 ] - : $gnu_stack[$max_gnu_stack_index]; - return; -} - -sub check_for_long_gnu_style_lines { - - # look at the current estimated maximum line length, and - # remove some whitespace if it exceeds the desired maximum - - # this is only for the '-lp' style - return unless ($rOpts_line_up_parentheses); - - # nothing can be done if no stack items defined for this line - return if ( $max_gnu_item_index == UNDEFINED_INDEX ); - - # see if we have exceeded the maximum desired line length - # keep 2 extra free because they are needed in some cases - # (result of trial-and-error testing) - my $spaces_needed = - $gnu_position_predictor - $rOpts_maximum_line_length + 2; - - return if ( $spaces_needed < 0 ); - - # We are over the limit, so try to remove a requested number of - # spaces from leading whitespace. We are only allowed to remove - # from whitespace items created on this batch, since others have - # already been used and cannot be undone. - my @candidates = (); - my $i; - - # loop over all whitespace items created for the current batch - for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { - my $item = $gnu_item_list[$i]; - - # item must still be open to be a candidate (otherwise it - # cannot influence the current token) - next if ( $item->get_CLOSED() >= 0 ); - - my $available_spaces = $item->get_AVAILABLE_SPACES(); - - if ( $available_spaces > 0 ) { - push( @candidates, [ $i, $available_spaces ] ); - } - } - - return unless (@candidates); - - # sort by available whitespace so that we can remove whitespace - # from the maximum available first - @candidates = sort { $b->[1] <=> $a->[1] } @candidates; - - # keep removing whitespace until we are done or have no more - my $candidate; - foreach $candidate (@candidates) { - my ( $i, $available_spaces ) = @{$candidate}; - my $deleted_spaces = - ( $available_spaces > $spaces_needed ) - ? $spaces_needed - : $available_spaces; - - # remove the incremental space from this item - $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); - - my $i_debug = $i; - - # update the leading whitespace of this item and all items - # that came after it - for ( ; $i <= $max_gnu_item_index ; $i++ ) { - - my $old_spaces = $gnu_item_list[$i]->get_SPACES(); - if ( $old_spaces > $deleted_spaces ) { - $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); - } - - # shouldn't happen except for code bug: - else { - my $level = $gnu_item_list[$i_debug]->get_LEVEL(); - my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); - my $old_level = $gnu_item_list[$i]->get_LEVEL(); - my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); - warning( -"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" - ); - report_definite_bug(); - } - } - $gnu_position_predictor -= $deleted_spaces; - $spaces_needed -= $deleted_spaces; - last unless ( $spaces_needed > 0 ); - } -} - -sub finish_lp_batch { - - # This routine is called once after each each output stream batch is - # finished to undo indentation for all incomplete -lp - # indentation levels. It is too risky to leave a level open, - # because then we can't backtrack in case of a long line to follow. - # This means that comments and blank lines will disrupt this - # indentation style. But the vertical aligner may be able to - # get the space back if there are side comments. - - # this is only for the 'lp' style - return unless ($rOpts_line_up_parentheses); - - # nothing can be done if no stack items defined for this line - return if ( $max_gnu_item_index == UNDEFINED_INDEX ); - - # loop over all whitespace items created for the current batch - my $i; - for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { - my $item = $gnu_item_list[$i]; - - # only look for open items - next if ( $item->get_CLOSED() >= 0 ); - - # Tentatively remove all of the available space - # (The vertical aligner will try to get it back later) - my $available_spaces = $item->get_AVAILABLE_SPACES(); - if ( $available_spaces > 0 ) { - - # delete incremental space for this item - $gnu_item_list[$i] - ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); - - # Reduce the total indentation space of any nodes that follow - # Note that any such nodes must necessarily be dependents - # of this node. - foreach ( $i + 1 .. $max_gnu_item_index ) { - $gnu_item_list[$_]->decrease_SPACES($available_spaces); - } - } - } - return; -} - -sub reduce_lp_indentation { - - # reduce the leading whitespace at token $i if possible by $spaces_needed - # (a large value of $spaces_needed will remove all excess space) - # NOTE: to be called from scan_list only for a sequence of tokens - # contained between opening and closing parens/braces/brackets - - my ( $i, $spaces_wanted ) = @_; - my $deleted_spaces = 0; - - my $item = $leading_spaces_to_go[$i]; - my $available_spaces = $item->get_AVAILABLE_SPACES(); - - if ( - $available_spaces > 0 - && ( ( $spaces_wanted <= $available_spaces ) - || !$item->get_HAVE_CHILD() ) - ) - { - - # we'll remove these spaces, but mark them as recoverable - $deleted_spaces = - $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted); - } - - return $deleted_spaces; -} - -sub token_sequence_length { - - # return length of tokens ($ifirst .. $ilast) including first & last - # returns 0 if $ifirst > $ilast - my $ifirst = shift; - my $ilast = shift; - return 0 if ( $ilast < 0 || $ifirst > $ilast ); - return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 ); - return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst]; -} - -sub total_line_length { - - # return length of a line of tokens ($ifirst .. $ilast) - my $ifirst = shift; - my $ilast = shift; - if ( $ifirst < 0 ) { $ifirst = 0 } - - return leading_spaces_to_go($ifirst) + - token_sequence_length( $ifirst, $ilast ); -} - -sub excess_line_length { - - # return number of characters by which a line of tokens ($ifirst..$ilast) - # exceeds the allowable line length. - my $ifirst = shift; - my $ilast = shift; - if ( $ifirst < 0 ) { $ifirst = 0 } - return leading_spaces_to_go($ifirst) + - token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length; -} - -sub finish_formatting { - - # flush buffer and write any informative messages - my $self = shift; - - flush(); - $file_writer_object->decrement_output_line_number() - ; # fix up line number since it was incremented - we_are_at_the_last_line(); - if ( $added_semicolon_count > 0 ) { - my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; - my $what = - ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; - write_logfile_entry("$added_semicolon_count $what added:\n"); - write_logfile_entry( - " $first at input line $first_added_semicolon_at\n"); - - if ( $added_semicolon_count > 1 ) { - write_logfile_entry( - " Last at input line $last_added_semicolon_at\n"); - } - write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); - write_logfile_entry("\n"); - } - - if ( $deleted_semicolon_count > 0 ) { - my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; - my $what = - ( $deleted_semicolon_count > 1 ) - ? "semicolons were" - : "semicolon was"; - write_logfile_entry( - "$deleted_semicolon_count unnecessary $what deleted:\n"); - write_logfile_entry( - " $first at input line $first_deleted_semicolon_at\n"); - - if ( $deleted_semicolon_count > 1 ) { - write_logfile_entry( - " Last at input line $last_deleted_semicolon_at\n"); - } - write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); - write_logfile_entry("\n"); - } - - if ( $embedded_tab_count > 0 ) { - my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; - my $what = - ( $embedded_tab_count > 1 ) - ? "quotes or patterns" - : "quote or pattern"; - write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); - write_logfile_entry( -"This means the display of this script could vary with device or software\n" - ); - write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); - - if ( $embedded_tab_count > 1 ) { - write_logfile_entry( - " Last at input line $last_embedded_tab_at\n"); - } - write_logfile_entry("\n"); - } - - if ($first_tabbing_disagreement) { - write_logfile_entry( -"First indentation disagreement seen at input line $first_tabbing_disagreement\n" - ); - } - - if ($in_tabbing_disagreement) { - write_logfile_entry( -"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" - ); - } - else { - - if ($last_tabbing_disagreement) { - - write_logfile_entry( -"Last indentation disagreement seen at input line $last_tabbing_disagreement\n" - ); - } - else { - write_logfile_entry("No indentation disagreement seen\n"); - } - } - write_logfile_entry("\n"); - - $vertical_aligner_object->report_anything_unusual(); - - $file_writer_object->report_line_length_errors(); -} - -sub check_options { - - # This routine is called to check the Opts hash after it is defined - - ($rOpts) = @_; - my ( $tabbing_string, $tab_msg ); - - make_static_block_comment_pattern(); - make_static_side_comment_pattern(); - make_closing_side_comment_prefix(); - make_closing_side_comment_list_pattern(); - $format_skipping_pattern_begin = - make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); - $format_skipping_pattern_end = - make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); - - # If closing side comments ARE selected, then we can safely - # delete old closing side comments unless closing side comment - # warnings are requested. This is a good idea because it will - # eliminate any old csc's which fall below the line count threshold. - # We cannot do this if warnings are turned on, though, because we - # might delete some text which has been added. So that must - # be handled when comments are created. - if ( $rOpts->{'closing-side-comments'} ) { - if ( !$rOpts->{'closing-side-comment-warnings'} ) { - $rOpts->{'delete-closing-side-comments'} = 1; - } - } - - # If closing side comments ARE NOT selected, but warnings ARE - # selected and we ARE DELETING csc's, then we will pretend to be - # adding with a huge interval. This will force the comments to be - # generated for comparison with the old comments, but not added. - elsif ( $rOpts->{'closing-side-comment-warnings'} ) { - if ( $rOpts->{'delete-closing-side-comments'} ) { - $rOpts->{'delete-closing-side-comments'} = 0; - $rOpts->{'closing-side-comments'} = 1; - $rOpts->{'closing-side-comment-interval'} = 100000000; - } - } - - make_bli_pattern(); - make_block_brace_vertical_tightness_pattern(); - - if ( $rOpts->{'line-up-parentheses'} ) { - - if ( $rOpts->{'indent-only'} - || !$rOpts->{'add-newlines'} - || !$rOpts->{'delete-old-newlines'} ) - { - warn <{'line-up-parentheses'} = 0; - } - } - - # At present, tabs are not compatable with the line-up-parentheses style - # (it would be possible to entab the total leading whitespace - # just prior to writing the line, if desired). - if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { - warn <{'tabs'} = 0; - } - - # Likewise, tabs are not compatable with outdenting.. - if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { - warn <{'tabs'} = 0; - } - - if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { - warn <{'tabs'} = 0; - } - - if ( !$rOpts->{'space-for-semicolon'} ) { - $want_left_space{'f'} = -1; - } - - if ( $rOpts->{'space-terminal-semicolon'} ) { - $want_left_space{';'} = 1; - } - - # implement outdenting preferences for keywords - %outdent_keyword = (); - unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) { - @_ = qw(next last redo goto return); # defaults - } - - # FUTURE: if not a keyword, assume that it is an identifier - foreach (@_) { - if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { - $outdent_keyword{$_} = 1; - } - else { - warn "ignoring '$_' in -okwl list; not a perl keyword"; - } - } - - # implement user whitespace preferences - if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) { - @want_left_space{@_} = (1) x scalar(@_); - } - - if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) { - @want_right_space{@_} = (1) x scalar(@_); - } - - if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) { - @want_left_space{@_} = (-1) x scalar(@_); - } - - if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) { - @want_right_space{@_} = (-1) x scalar(@_); - } - if ( $rOpts->{'dump-want-left-space'} ) { - dump_want_left_space(*STDOUT); - exit 1; - } - - if ( $rOpts->{'dump-want-right-space'} ) { - dump_want_right_space(*STDOUT); - exit 1; - } - - # default keywords for which space is introduced before an opening paren - # (at present, including them messes up vertical alignment) - @_ = qw(my local our and or err eq ne if else elsif until - unless while for foreach return switch case given when); - @space_after_keyword{@_} = (1) x scalar(@_); - - # allow user to modify these defaults - if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { - @space_after_keyword{@_} = (1) x scalar(@_); - } - - if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { - @space_after_keyword{@_} = (0) x scalar(@_); - } - - # implement user break preferences - my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & - = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= - . : ? && || and or err xor - ); - - my $break_after = sub { - foreach my $tok (@_) { - if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: - my $lbs = $left_bond_strength{$tok}; - my $rbs = $right_bond_strength{$tok}; - if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { - ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = - ( $lbs, $rbs ); - } - } - }; - - my $break_before = sub { - foreach my $tok (@_) { - my $lbs = $left_bond_strength{$tok}; - my $rbs = $right_bond_strength{$tok}; - if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { - ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = - ( $lbs, $rbs ); - } - } - }; - - $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); - $break_before->(@all_operators) - if ( $rOpts->{'break-before-all-operators'} ); - - $break_after->( split_words( $rOpts->{'want-break-after'} ) ); - $break_before->( split_words( $rOpts->{'want-break-before'} ) ); - - # make note if breaks are before certain key types - %want_break_before = (); - foreach my $tok ( @all_operators, ',' ) { - $want_break_before{$tok} = - $left_bond_strength{$tok} < $right_bond_strength{$tok}; - } - - # Coordinate ?/: breaks, which must be similar - if ( !$want_break_before{':'} ) { - $want_break_before{'?'} = $want_break_before{':'}; - $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; - $left_bond_strength{'?'} = NO_BREAK; - } - - # Define here tokens which may follow the closing brace of a do statement - # on the same line, as in: - # } while ( $something); - @_ = qw(until while unless if ; : ); - push @_, ','; - @is_do_follower{@_} = (1) x scalar(@_); - - # These tokens may follow the closing brace of an if or elsif block. - # In other words, for cuddled else we want code to look like: - # } elsif ( $something) { - # } else { - if ( $rOpts->{'cuddled-else'} ) { - @_ = qw(else elsif); - @is_if_brace_follower{@_} = (1) x scalar(@_); - } - else { - %is_if_brace_follower = (); - } - - # nothing can follow the closing curly of an else { } block: - %is_else_brace_follower = (); - - # what can follow a multi-line anonymous sub definition closing curly: - @_ = qw# ; : => or and && || ~~ !~~ ) #; - push @_, ','; - @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); - - # what can follow a one-line anonynomous sub closing curly: - # one-line anonumous subs also have ']' here... - # see tk3.t and PP.pm - @_ = qw# ; : => or and && || ) ] ~~ !~~ #; - push @_, ','; - @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); - - # What can follow a closing curly of a block - # which is not an if/elsif/else/do/sort/map/grep/eval/sub - # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' - @_ = qw# ; : => or and && || ) #; - push @_, ','; - - # allow cuddled continue if cuddled else is specified - if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } - - @is_other_brace_follower{@_} = (1) x scalar(@_); - - $right_bond_strength{'{'} = WEAK; - $left_bond_strength{'{'} = VERY_STRONG; - - # make -l=0 equal to -l=infinite - if ( !$rOpts->{'maximum-line-length'} ) { - $rOpts->{'maximum-line-length'} = 1000000; - } - - # make -lbl=0 equal to -lbl=infinite - if ( !$rOpts->{'long-block-line-count'} ) { - $rOpts->{'long-block-line-count'} = 1000000; - } - - my $ole = $rOpts->{'output-line-ending'}; - if ($ole) { - my %endings = ( - dos => "\015\012", - win => "\015\012", - mac => "\015", - unix => "\012", - ); - $ole = lc $ole; - unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { - my $str = join " ", keys %endings; - die <{'preserve-line-endings'} ) { - warn "Ignoring -ple; conflicts with -ole\n"; - $rOpts->{'preserve-line-endings'} = undef; - } - } - - # hashes used to simplify setting whitespace - %tightness = ( - '{' => $rOpts->{'brace-tightness'}, - '}' => $rOpts->{'brace-tightness'}, - '(' => $rOpts->{'paren-tightness'}, - ')' => $rOpts->{'paren-tightness'}, - '[' => $rOpts->{'square-bracket-tightness'}, - ']' => $rOpts->{'square-bracket-tightness'}, - ); - %matching_token = ( - '{' => '}', - '(' => ')', - '[' => ']', - '?' => ':', - ); - - # frequently used parameters - $rOpts_add_newlines = $rOpts->{'add-newlines'}; - $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; - $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; - $rOpts_block_brace_vertical_tightness = - $rOpts->{'block-brace-vertical-tightness'}; - $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; - $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; - $rOpts_break_at_old_ternary_breakpoints = - $rOpts->{'break-at-old-ternary-breakpoints'}; - $rOpts_break_at_old_comma_breakpoints = - $rOpts->{'break-at-old-comma-breakpoints'}; - $rOpts_break_at_old_keyword_breakpoints = - $rOpts->{'break-at-old-keyword-breakpoints'}; - $rOpts_break_at_old_logical_breakpoints = - $rOpts->{'break-at-old-logical-breakpoints'}; - $rOpts_closing_side_comment_else_flag = - $rOpts->{'closing-side-comment-else-flag'}; - $rOpts_closing_side_comment_maximum_text = - $rOpts->{'closing-side-comment-maximum-text'}; - $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; - $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; - $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; - $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; - $rOpts_indent_columns = $rOpts->{'indent-columns'}; - $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; - $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; - $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; - $rOpts_short_concatenation_item_length = - $rOpts->{'short-concatenation-item-length'}; - $rOpts_swallow_optional_blank_lines = - $rOpts->{'swallow-optional-blank-lines'}; - $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; - $rOpts_format_skipping = $rOpts->{'format-skipping'}; - $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; - $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; - $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; - $half_maximum_line_length = $rOpts_maximum_line_length / 2; - - # Note that both opening and closing tokens can access the opening - # and closing flags of their container types. - %opening_vertical_tightness = ( - '(' => $rOpts->{'paren-vertical-tightness'}, - '{' => $rOpts->{'brace-vertical-tightness'}, - '[' => $rOpts->{'square-bracket-vertical-tightness'}, - ')' => $rOpts->{'paren-vertical-tightness'}, - '}' => $rOpts->{'brace-vertical-tightness'}, - ']' => $rOpts->{'square-bracket-vertical-tightness'}, - ); - - %closing_vertical_tightness = ( - '(' => $rOpts->{'paren-vertical-tightness-closing'}, - '{' => $rOpts->{'brace-vertical-tightness-closing'}, - '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, - ')' => $rOpts->{'paren-vertical-tightness-closing'}, - '}' => $rOpts->{'brace-vertical-tightness-closing'}, - ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, - ); - - # assume flag for '>' same as ')' for closing qw quotes - %closing_token_indentation = ( - ')' => $rOpts->{'closing-paren-indentation'}, - '}' => $rOpts->{'closing-brace-indentation'}, - ']' => $rOpts->{'closing-square-bracket-indentation'}, - '>' => $rOpts->{'closing-paren-indentation'}, - ); - - %opening_token_right = ( - '(' => $rOpts->{'opening-paren-right'}, - '{' => $rOpts->{'opening-hash-brace-right'}, - '[' => $rOpts->{'opening-square-bracket-right'}, - ); - - %stack_opening_token = ( - '(' => $rOpts->{'stack-opening-paren'}, - '{' => $rOpts->{'stack-opening-hash-brace'}, - '[' => $rOpts->{'stack-opening-square-bracket'}, - ); - - %stack_closing_token = ( - ')' => $rOpts->{'stack-closing-paren'}, - '}' => $rOpts->{'stack-closing-hash-brace'}, - ']' => $rOpts->{'stack-closing-square-bracket'}, - ); -} - -sub make_static_block_comment_pattern { - - # create the pattern used to identify static block comments - $static_block_comment_pattern = '^\s*##'; - - # allow the user to change it - if ( $rOpts->{'static-block-comment-prefix'} ) { - my $prefix = $rOpts->{'static-block-comment-prefix'}; - $prefix =~ s/^\s*//; - my $pattern = $prefix; - - # user may give leading caret to force matching left comments only - if ( $prefix !~ /^\^#/ ) { - if ( $prefix !~ /^#/ ) { - die -"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; - } - $pattern = '^\s*' . $prefix; - } - eval "'##'=~/$pattern/"; - if ($@) { - die -"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; - } - $static_block_comment_pattern = $pattern; - } -} - -sub make_format_skipping_pattern { - my ( $opt_name, $default ) = @_; - my $param = $rOpts->{$opt_name}; - unless ($param) { $param = $default } - $param =~ s/^\s*//; - if ( $param !~ /^#/ ) { - die "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; - } - my $pattern = '^' . $param . '\s'; - eval "'#'=~/$pattern/"; - if ($@) { - die -"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; - } - return $pattern; -} - -sub make_closing_side_comment_list_pattern { - - # turn any input list into a regex for recognizing selected block types - $closing_side_comment_list_pattern = '^\w+'; - if ( defined( $rOpts->{'closing-side-comment-list'} ) - && $rOpts->{'closing-side-comment-list'} ) - { - $closing_side_comment_list_pattern = - make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); - } -} - -sub make_bli_pattern { - - if ( defined( $rOpts->{'brace-left-and-indent-list'} ) - && $rOpts->{'brace-left-and-indent-list'} ) - { - $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; - } - - $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); -} - -sub make_block_brace_vertical_tightness_pattern { - - # turn any input list into a regex for recognizing selected block types - $block_brace_vertical_tightness_pattern = - '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; - - if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) - && $rOpts->{'block-brace-vertical-tightness-list'} ) - { - $block_brace_vertical_tightness_pattern = - make_block_pattern( '-bbvtl', - $rOpts->{'block-brace-vertical-tightness-list'} ); - } -} - -sub make_block_pattern { - - # given a string of block-type keywords, return a regex to match them - # The only tricky part is that labels are indicated with a single ':' - # and the 'sub' token text may have additional text after it (name of - # sub). - # - # Example: - # - # input string: "if else elsif unless while for foreach do : sub"; - # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; - - my ( $abbrev, $string ) = @_; - my @list = split_words($string); - my @words = (); - my %seen; - for my $i (@list) { - next if $seen{$i}; - $seen{$i} = 1; - if ( $i eq 'sub' ) { - } - elsif ( $i eq ':' ) { - push @words, '\w+:'; - } - elsif ( $i =~ /^\w/ ) { - push @words, $i; - } - else { - warn "unrecognized block type $i after $abbrev, ignoring\n"; - } - } - my $pattern = '(' . join( '|', @words ) . ')$'; - if ( $seen{'sub'} ) { - $pattern = '(' . $pattern . '|sub)'; - } - $pattern = '^' . $pattern; - return $pattern; -} - -sub make_static_side_comment_pattern { - - # create the pattern used to identify static side comments - $static_side_comment_pattern = '^##'; - - # allow the user to change it - if ( $rOpts->{'static-side-comment-prefix'} ) { - my $prefix = $rOpts->{'static-side-comment-prefix'}; - $prefix =~ s/^\s*//; - my $pattern = '^' . $prefix; - eval "'##'=~/$pattern/"; - if ($@) { - die -"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; - } - $static_side_comment_pattern = $pattern; - } -} - -sub make_closing_side_comment_prefix { - - # Be sure we have a valid closing side comment prefix - my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; - my $csc_prefix_pattern; - if ( !defined($csc_prefix) ) { - $csc_prefix = '## end'; - $csc_prefix_pattern = '^##\s+end'; - } - else { - my $test_csc_prefix = $csc_prefix; - if ( $test_csc_prefix !~ /^#/ ) { - $test_csc_prefix = '#' . $test_csc_prefix; - } - - # make a regex to recognize the prefix - my $test_csc_prefix_pattern = $test_csc_prefix; - - # escape any special characters - $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; - - $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; - - # allow exact number of intermediate spaces to vary - $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; - - # make sure we have a good pattern - # if we fail this we probably have an error in escaping - # characters. - eval "'##'=~/$test_csc_prefix_pattern/"; - if ($@) { - - # shouldn't happen..must have screwed up escaping, above - report_definite_bug(); - warn -"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; - - # just warn and keep going with defaults - warn "Please consider using a simpler -cscp prefix\n"; - warn "Using default -cscp instead; please check output\n"; - } - else { - $csc_prefix = $test_csc_prefix; - $csc_prefix_pattern = $test_csc_prefix_pattern; - } - } - $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; - $closing_side_comment_prefix_pattern = $csc_prefix_pattern; -} - -sub dump_want_left_space { - my $fh = shift; - local $" = "\n"; - print $fh <1; - # $a = $b - III; - # and even this: - # $a = - III; - || ( ( $tokenl eq '-' ) - && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) - - # '= -' should not become =- or you will get a warning - # about reversed -= - # || ($tokenr eq '-') - - # keep a space between a quote and a bareword to prevent the - # bareword from becomming a quote modifier. - || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) - - # keep a space between a token ending in '$' and any word; - # this caused trouble: "die @$ if $@" - || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) - && ( $tokenr =~ /^[a-zA-Z_]/ ) ) - - # perl is very fussy about spaces before << - || ( $tokenr =~ /^\<\' is excluded because it never gets space - # parentheses and brackets are excluded since they are handled specially - # curly braces are included but may be overridden by logic, such as - # newline logic. - - # NEW_TOKENS: create a whitespace rule here. This can be as - # simple as adding your new letter to @spaces_both_sides, for - # example. - - @_ = qw" L { ( [ "; - @is_opening_type{@_} = (1) x scalar(@_); - - @_ = qw" R } ) ] "; - @is_closing_type{@_} = (1) x scalar(@_); - - my @spaces_both_sides = qw" - + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= - .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ - &&= ||= //= <=> A k f w F n C Y U G v - "; - - my @spaces_left_side = qw" - t ! ~ m p { \ h pp mm Z j - "; - push( @spaces_left_side, '#' ); # avoids warning message - - my @spaces_right_side = qw" - ; } ) ] R J ++ -- **= - "; - push( @spaces_right_side, ',' ); # avoids warning message - @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); - @want_right_space{@spaces_both_sides} = - (1) x scalar(@spaces_both_sides); - @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); - @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); - @want_left_space{@spaces_right_side} = - (-1) x scalar(@spaces_right_side); - @want_right_space{@spaces_right_side} = - (1) x scalar(@spaces_right_side); - $want_left_space{'L'} = WS_NO; - $want_left_space{'->'} = WS_NO; - $want_right_space{'->'} = WS_NO; - $want_left_space{'**'} = WS_NO; - $want_right_space{'**'} = WS_NO; - - # hash type information must stay tightly bound - # as in : ${xxxx} - $binary_ws_rules{'i'}{'L'} = WS_NO; - $binary_ws_rules{'i'}{'{'} = WS_YES; - $binary_ws_rules{'k'}{'{'} = WS_YES; - $binary_ws_rules{'U'}{'{'} = WS_YES; - $binary_ws_rules{'i'}{'['} = WS_NO; - $binary_ws_rules{'R'}{'L'} = WS_NO; - $binary_ws_rules{'R'}{'{'} = WS_NO; - $binary_ws_rules{'t'}{'L'} = WS_NO; - $binary_ws_rules{'t'}{'{'} = WS_NO; - $binary_ws_rules{'}'}{'L'} = WS_NO; - $binary_ws_rules{'}'}{'{'} = WS_NO; - $binary_ws_rules{'$'}{'L'} = WS_NO; - $binary_ws_rules{'$'}{'{'} = WS_NO; - $binary_ws_rules{'@'}{'L'} = WS_NO; - $binary_ws_rules{'@'}{'{'} = WS_NO; - $binary_ws_rules{'='}{'L'} = WS_YES; - - # the following includes ') {' - # as in : if ( xxx ) { yyy } - $binary_ws_rules{']'}{'L'} = WS_NO; - $binary_ws_rules{']'}{'{'} = WS_NO; - $binary_ws_rules{')'}{'{'} = WS_YES; - $binary_ws_rules{')'}{'['} = WS_NO; - $binary_ws_rules{']'}{'['} = WS_NO; - $binary_ws_rules{']'}{'{'} = WS_NO; - $binary_ws_rules{'}'}{'['} = WS_NO; - $binary_ws_rules{'R'}{'['} = WS_NO; - - $binary_ws_rules{']'}{'++'} = WS_NO; - $binary_ws_rules{']'}{'--'} = WS_NO; - $binary_ws_rules{')'}{'++'} = WS_NO; - $binary_ws_rules{')'}{'--'} = WS_NO; - - $binary_ws_rules{'R'}{'++'} = WS_NO; - $binary_ws_rules{'R'}{'--'} = WS_NO; - - ######################################################## - # should no longer be necessary (see niek.pl) - ##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label - ##$binary_ws_rules{'w'}{':'} = WS_NO; - ######################################################## - $binary_ws_rules{'i'}{'Q'} = WS_YES; - $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' - - # FIXME: we need to split 'i' into variables and functions - # and have no space for functions but space for variables. For now, - # I have a special patch in the special rules below - $binary_ws_rules{'i'}{'('} = WS_NO; - - $binary_ws_rules{'w'}{'('} = WS_NO; - $binary_ws_rules{'w'}{'{'} = WS_YES; - } - my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; - my ( $last_token, $last_type, $last_block_type, $token, $type, - $block_type ); - my (@white_space_flag); - my $j_tight_closing_paren = -1; - - if ( $max_index_to_go >= 0 ) { - $token = $tokens_to_go[$max_index_to_go]; - $type = $types_to_go[$max_index_to_go]; - $block_type = $block_type_to_go[$max_index_to_go]; - } - else { - $token = ' '; - $type = 'b'; - $block_type = ''; - } - - # loop over all tokens - my ( $j, $ws ); - - for ( $j = 0 ; $j <= $jmax ; $j++ ) { - - if ( $$rtoken_type[$j] eq 'b' ) { - $white_space_flag[$j] = WS_OPTIONAL; - next; - } - - # set a default value, to be changed as needed - $ws = undef; - $last_token = $token; - $last_type = $type; - $last_block_type = $block_type; - $token = $$rtokens[$j]; - $type = $$rtoken_type[$j]; - $block_type = $$rblock_type[$j]; - - #--------------------------------------------------------------- - # section 1: - # handle space on the inside of opening braces - #--------------------------------------------------------------- - - # /^[L\{\(\[]$/ - if ( $is_opening_type{$last_type} ) { - - $j_tight_closing_paren = -1; - - # let's keep empty matched braces together: () {} [] - # except for BLOCKS - if ( $token eq $matching_token{$last_token} ) { - if ($block_type) { - $ws = WS_YES; - } - else { - $ws = WS_NO; - } - } - else { - - # we're considering the right of an opening brace - # tightness = 0 means always pad inside with space - # tightness = 1 means pad inside if "complex" - # tightness = 2 means never pad inside with space - - my $tightness; - if ( $last_type eq '{' - && $last_token eq '{' - && $last_block_type ) - { - $tightness = $rOpts_block_brace_tightness; - } - else { $tightness = $tightness{$last_token} } - - if ( $tightness <= 0 ) { - $ws = WS_YES; - } - elsif ( $tightness > 1 ) { - $ws = WS_NO; - } - else { - - # Patch to count '-foo' as single token so that - # each of $a{-foo} and $a{foo} and $a{'foo'} do - # not get spaces with default formatting. - my $j_here = $j; - ++$j_here - if ( $token eq '-' - && $last_token eq '{' - && $$rtoken_type[ $j + 1 ] eq 'w' ); - - # $j_next is where a closing token should be if - # the container has a single token - my $j_next = - ( $$rtoken_type[ $j_here + 1 ] eq 'b' ) - ? $j_here + 2 - : $j_here + 1; - my $tok_next = $$rtokens[$j_next]; - my $type_next = $$rtoken_type[$j_next]; - - # for tightness = 1, if there is just one token - # within the matching pair, we will keep it tight - if ( - $tok_next eq $matching_token{$last_token} - - # but watch out for this: [ [ ] (misc.t) - && $last_token ne $token - ) - { - - # remember where to put the space for the closing paren - $j_tight_closing_paren = $j_next; - $ws = WS_NO; - } - else { - $ws = WS_YES; - } - } - } - } # done with opening braces and brackets - my $ws_1 = $ws - if FORMATTER_DEBUG_FLAG_WHITE; - - #--------------------------------------------------------------- - # section 2: - # handle space on inside of closing brace pairs - #--------------------------------------------------------------- - - # /[\}\)\]R]/ - if ( $is_closing_type{$type} ) { - - if ( $j == $j_tight_closing_paren ) { - - $j_tight_closing_paren = -1; - $ws = WS_NO; - } - else { - - if ( !defined($ws) ) { - - my $tightness; - if ( $type eq '}' && $token eq '}' && $block_type ) { - $tightness = $rOpts_block_brace_tightness; - } - else { $tightness = $tightness{$token} } - - $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; - } - } - } - - my $ws_2 = $ws - if FORMATTER_DEBUG_FLAG_WHITE; - - #--------------------------------------------------------------- - # section 3: - # use the binary table - #--------------------------------------------------------------- - if ( !defined($ws) ) { - $ws = $binary_ws_rules{$last_type}{$type}; - } - my $ws_3 = $ws - if FORMATTER_DEBUG_FLAG_WHITE; - - #--------------------------------------------------------------- - # section 4: - # some special cases - #--------------------------------------------------------------- - if ( $token eq '(' ) { - - # This will have to be tweaked as tokenization changes. - # We usually want a space at '} (', for example: - # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); - # - # But not others: - # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); - # At present, the above & block is marked as type L/R so this case - # won't go through here. - if ( $last_type eq '}' ) { $ws = WS_YES } - - # NOTE: some older versions of Perl had occasional problems if - # spaces are introduced between keywords or functions and opening - # parens. So the default is not to do this except is certain - # cases. The current Perl seems to tolerate spaces. - - # Space between keyword and '(' - elsif ( $last_type eq 'k' ) { - $ws = WS_NO - unless ( $rOpts_space_keyword_paren - || $space_after_keyword{$last_token} ); - } - - # Space between function and '(' - # ----------------------------------------------------- - # 'w' and 'i' checks for something like: - # myfun( &myfun( ->myfun( - # ----------------------------------------------------- - elsif (( $last_type =~ /^[wU]$/ ) - || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) - { - $ws = WS_NO unless ($rOpts_space_function_paren); - } - - # space between something like $i and ( in - # for $i ( 0 .. 20 ) { - # FIXME: eventually, type 'i' needs to be split into multiple - # token types so this can be a hardwired rule. - elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { - $ws = WS_YES; - } - - # allow constant function followed by '()' to retain no space - elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) { - $ws = WS_NO; - } - } - - # patch for SWITCH/CASE: make space at ']{' optional - # since the '{' might begin a case or when block - elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { - $ws = WS_OPTIONAL; - } - - # keep space between 'sub' and '{' for anonymous sub definition - if ( $type eq '{' ) { - if ( $last_token eq 'sub' ) { - $ws = WS_YES; - } - - # this is needed to avoid no space in '){' - if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } - - # avoid any space before the brace or bracket in something like - # @opts{'a','b',...} - if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { - $ws = WS_NO; - } - } - - elsif ( $type eq 'i' ) { - - # never a space before -> - if ( $token =~ /^\-\>/ ) { - $ws = WS_NO; - } - } - - # retain any space between '-' and bare word - elsif ( $type eq 'w' || $type eq 'C' ) { - $ws = WS_OPTIONAL if $last_type eq '-'; - - # never a space before -> - if ( $token =~ /^\-\>/ ) { - $ws = WS_NO; - } - } - - # retain any space between '-' and bare word - # example: avoid space between 'USER' and '-' here: - # $myhash{USER-NAME}='steve'; - elsif ( $type eq 'm' || $type eq '-' ) { - $ws = WS_OPTIONAL if ( $last_type eq 'w' ); - } - - # always space before side comment - elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } - - # always preserver whatever space was used after a possible - # filehandle (except _) or here doc operator - if ( - $type ne '#' - && ( ( $last_type eq 'Z' && $last_token ne '_' ) - || $last_type eq 'h' ) - ) - { - $ws = WS_OPTIONAL; - } - - my $ws_4 = $ws - if FORMATTER_DEBUG_FLAG_WHITE; - - #--------------------------------------------------------------- - # section 5: - # default rules not covered above - #--------------------------------------------------------------- - # if we fall through to here, - # look at the pre-defined hash tables for the two tokens, and - # if (they are equal) use the common value - # if (either is zero or undef) use the other - # if (either is -1) use it - # That is, - # left vs right - # 1 vs 1 --> 1 - # 0 vs 0 --> 0 - # -1 vs -1 --> -1 - # - # 0 vs -1 --> -1 - # 0 vs 1 --> 1 - # 1 vs 0 --> 1 - # -1 vs 0 --> -1 - # - # -1 vs 1 --> -1 - # 1 vs -1 --> -1 - if ( !defined($ws) ) { - my $wl = $want_left_space{$type}; - my $wr = $want_right_space{$last_type}; - if ( !defined($wl) ) { $wl = 0 } - if ( !defined($wr) ) { $wr = 0 } - $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; - } - - if ( !defined($ws) ) { - $ws = 0; - write_diagnostics( - "WS flag is undefined for tokens $last_token $token\n"); - } - - # Treat newline as a whitespace. Otherwise, we might combine - # 'Send' and '-recipients' here according to the above rules: - # my $msg = new Fax::Send - # -recipients => $to, - # -data => $data; - if ( $ws == 0 && $j == 0 ) { $ws = 1 } - - if ( ( $ws == 0 ) - && $j > 0 - && $j < $jmax - && ( $last_type !~ /^[Zh]$/ ) ) - { - - # If this happens, we have a non-fatal but undesirable - # hole in the above rules which should be patched. - write_diagnostics( - "WS flag is zero for tokens $last_token $token\n"); - } - $white_space_flag[$j] = $ws; - - FORMATTER_DEBUG_FLAG_WHITE && do { - my $str = substr( $last_token, 0, 15 ); - $str .= ' ' x ( 16 - length($str) ); - if ( !defined($ws_1) ) { $ws_1 = "*" } - if ( !defined($ws_2) ) { $ws_2 = "*" } - if ( !defined($ws_3) ) { $ws_3 = "*" } - if ( !defined($ws_4) ) { $ws_4 = "*" } - print -"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; - }; - } - return \@white_space_flag; -} - -{ # begin print_line_of_tokens - - my $rtoken_type; - my $rtokens; - my $rlevels; - my $rslevels; - my $rblock_type; - my $rcontainer_type; - my $rcontainer_environment; - my $rtype_sequence; - my $input_line; - my $rnesting_tokens; - my $rci_levels; - my $rnesting_blocks; - - my $in_quote; - my $python_indentation_level; - - # These local token variables are stored by store_token_to_go: - my $block_type; - my $ci_level; - my $container_environment; - my $container_type; - my $in_continued_quote; - my $level; - my $nesting_blocks; - my $no_internal_newlines; - my $slevel; - my $token; - my $type; - my $type_sequence; - - # routine to pull the jth token from the line of tokens - sub extract_token { - my $j = shift; - $token = $$rtokens[$j]; - $type = $$rtoken_type[$j]; - $block_type = $$rblock_type[$j]; - $container_type = $$rcontainer_type[$j]; - $container_environment = $$rcontainer_environment[$j]; - $type_sequence = $$rtype_sequence[$j]; - $level = $$rlevels[$j]; - $slevel = $$rslevels[$j]; - $nesting_blocks = $$rnesting_blocks[$j]; - $ci_level = $$rci_levels[$j]; - } - - { - my @saved_token; - - sub save_current_token { - - @saved_token = ( - $block_type, $ci_level, - $container_environment, $container_type, - $in_continued_quote, $level, - $nesting_blocks, $no_internal_newlines, - $slevel, $token, - $type, $type_sequence, - ); - } - - sub restore_current_token { - ( - $block_type, $ci_level, - $container_environment, $container_type, - $in_continued_quote, $level, - $nesting_blocks, $no_internal_newlines, - $slevel, $token, - $type, $type_sequence, - ) = @saved_token; - } - } - - # Routine to place the current token into the output stream. - # Called once per output token. - sub store_token_to_go { - - my $flag = $no_internal_newlines; - if ( $_[0] ) { $flag = 1 } - - $tokens_to_go[ ++$max_index_to_go ] = $token; - $types_to_go[$max_index_to_go] = $type; - $nobreak_to_go[$max_index_to_go] = $flag; - $old_breakpoint_to_go[$max_index_to_go] = 0; - $forced_breakpoint_to_go[$max_index_to_go] = 0; - $block_type_to_go[$max_index_to_go] = $block_type; - $type_sequence_to_go[$max_index_to_go] = $type_sequence; - $container_environment_to_go[$max_index_to_go] = $container_environment; - $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; - $ci_levels_to_go[$max_index_to_go] = $ci_level; - $mate_index_to_go[$max_index_to_go] = -1; - $matching_token_to_go[$max_index_to_go] = ''; - $bond_strength_to_go[$max_index_to_go] = 0; - - # Note: negative levels are currently retained as a diagnostic so that - # the 'final indentation level' is correctly reported for bad scripts. - # But this means that every use of $level as an index must be checked. - # If this becomes too much of a problem, we might give up and just clip - # them at zero. - ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; - $levels_to_go[$max_index_to_go] = $level; - $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; - $lengths_to_go[ $max_index_to_go + 1 ] = - $lengths_to_go[$max_index_to_go] + length($token); - - # Define the indentation that this token would have if it started - # a new line. We have to do this now because we need to know this - # when considering one-line blocks. - set_leading_whitespace( $level, $ci_level, $in_continued_quote ); - - if ( $type ne 'b' ) { - $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; - $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; - $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; - $last_nonblank_index_to_go = $max_index_to_go; - $last_nonblank_type_to_go = $type; - $last_nonblank_token_to_go = $token; - if ( $type eq ',' ) { - $comma_count_in_batch++; - } - } - - FORMATTER_DEBUG_FLAG_STORE && do { - my ( $a, $b, $c ) = caller(); - print -"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; - }; - } - - sub insert_new_token_to_go { - - # insert a new token into the output stream. use same level as - # previous token; assumes a character at max_index_to_go. - save_current_token(); - ( $token, $type, $slevel, $no_internal_newlines ) = @_; - - if ( $max_index_to_go == UNDEFINED_INDEX ) { - warning("code bug: bad call to insert_new_token_to_go\n"); - } - $level = $levels_to_go[$max_index_to_go]; - - # FIXME: it seems to be necessary to use the next, rather than - # previous, value of this variable when creating a new blank (align.t) - #my $slevel = $nesting_depth_to_go[$max_index_to_go]; - $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; - $ci_level = $ci_levels_to_go[$max_index_to_go]; - $container_environment = $container_environment_to_go[$max_index_to_go]; - $in_continued_quote = 0; - $block_type = ""; - $type_sequence = ""; - store_token_to_go(); - restore_current_token(); - return; - } - - sub print_line_of_tokens { - - my $line_of_tokens = shift; - - # This routine is called once per input line to process all of - # the tokens on that line. This is the first stage of - # beautification. - # - # Full-line comments and blank lines may be processed immediately. - # - # For normal lines of code, the tokens are stored one-by-one, - # via calls to 'sub store_token_to_go', until a known line break - # point is reached. Then, the batch of collected tokens is - # passed along to 'sub output_line_to_go' for further - # processing. This routine decides if there should be - # whitespace between each pair of non-white tokens, so later - # routines only need to decide on any additional line breaks. - # Any whitespace is initally a single space character. Later, - # the vertical aligner may expand that to be multiple space - # characters if necessary for alignment. - - # extract input line number for error messages - $input_line_number = $line_of_tokens->{_line_number}; - - $rtoken_type = $line_of_tokens->{_rtoken_type}; - $rtokens = $line_of_tokens->{_rtokens}; - $rlevels = $line_of_tokens->{_rlevels}; - $rslevels = $line_of_tokens->{_rslevels}; - $rblock_type = $line_of_tokens->{_rblock_type}; - $rcontainer_type = $line_of_tokens->{_rcontainer_type}; - $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; - $rtype_sequence = $line_of_tokens->{_rtype_sequence}; - $input_line = $line_of_tokens->{_line_text}; - $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; - $rci_levels = $line_of_tokens->{_rci_levels}; - $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; - - $in_continued_quote = $starting_in_quote = - $line_of_tokens->{_starting_in_quote}; - $in_quote = $line_of_tokens->{_ending_in_quote}; - $ending_in_quote = $in_quote; - $python_indentation_level = - $line_of_tokens->{_python_indentation_level}; - - my $j; - my $j_next; - my $jmax; - my $next_nonblank_token; - my $next_nonblank_token_type; - my $rwhite_space_flag; - - $jmax = @$rtokens - 1; - $block_type = ""; - $container_type = ""; - $container_environment = ""; - $type_sequence = ""; - $no_internal_newlines = 1 - $rOpts_add_newlines; - $is_static_block_comment = 0; - - # Handle a continued quote.. - if ($in_continued_quote) { - - # A line which is entirely a quote or pattern must go out - # verbatim. Note: the \n is contained in $input_line. - if ( $jmax <= 0 ) { - if ( ( $input_line =~ "\t" ) ) { - note_embedded_tab(); - } - write_unindented_line("$input_line"); - $last_line_had_side_comment = 0; - return; - } - - # prior to version 20010406, perltidy had a bug which placed - # continuation indentation before the last line of some multiline - # quotes and patterns -- exactly the lines passing this way. - # To help find affected lines in scripts run with these - # versions, run with '-chk', and it will warn of any quotes or - # patterns which might have been modified by these early - # versions. - if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) { - warning( -"-chk: please check this line for extra leading whitespace\n" - ); - } - } - - # Write line verbatim if we are in a formatting skip section - if ($in_format_skipping_section) { - write_unindented_line("$input_line"); - $last_line_had_side_comment = 0; - - # Note: extra space appended to comment simplifies pattern matching - if ( $jmax == 0 - && $$rtoken_type[0] eq '#' - && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o ) - { - $in_format_skipping_section = 0; - write_logfile_entry("Exiting formatting skip section\n"); - } - return; - } - - # See if we are entering a formatting skip section - if ( $rOpts_format_skipping - && $jmax == 0 - && $$rtoken_type[0] eq '#' - && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o ) - { - flush(); - $in_format_skipping_section = 1; - write_logfile_entry("Entering formatting skip section\n"); - write_unindented_line("$input_line"); - $last_line_had_side_comment = 0; - return; - } - - # delete trailing blank tokens - if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } - - # Handle a blank line.. - if ( $jmax < 0 ) { - - # For the 'swallow-optional-blank-lines' option, we delete all - # old blank lines and let the blank line rules generate any - # needed blanks. - if ( !$rOpts_swallow_optional_blank_lines ) { - flush(); - $file_writer_object->write_blank_code_line(); - $last_line_leading_type = 'b'; - } - $last_line_had_side_comment = 0; - return; - } - - # see if this is a static block comment (starts with ## by default) - my $is_static_block_comment_without_leading_space = 0; - if ( $jmax == 0 - && $$rtoken_type[0] eq '#' - && $rOpts->{'static-block-comments'} - && $input_line =~ /$static_block_comment_pattern/o ) - { - $is_static_block_comment = 1; - $is_static_block_comment_without_leading_space = - substr( $input_line, 0, 1 ) eq '#'; - } - - # Check for comments which are line directives - # Treat exactly as static block comments without leading space - # reference: perlsyn, near end, section Plain Old Comments (Not!) - # example: '# line 42 "new_filename.plx"' - if ( - $jmax == 0 - && $$rtoken_type[0] eq '#' - && $input_line =~ /^\# \s* - line \s+ (\d+) \s* - (?:\s("?)([^"]+)\2)? \s* - $/x - ) - { - $is_static_block_comment = 1; - $is_static_block_comment_without_leading_space = 1; - } - - # create a hanging side comment if appropriate - if ( - $jmax == 0 - && $$rtoken_type[0] eq '#' # only token is a comment - && $last_line_had_side_comment # last line had side comment - && $input_line =~ /^\s/ # there is some leading space - && !$is_static_block_comment # do not make static comment hanging - && $rOpts->{'hanging-side-comments'} # user is allowing this - ) - { - - # We will insert an empty qw string at the start of the token list - # to force this comment to be a side comment. The vertical aligner - # should then line it up with the previous side comment. - unshift @$rtoken_type, 'q'; - unshift @$rtokens, ''; - unshift @$rlevels, $$rlevels[0]; - unshift @$rslevels, $$rslevels[0]; - unshift @$rblock_type, ''; - unshift @$rcontainer_type, ''; - unshift @$rcontainer_environment, ''; - unshift @$rtype_sequence, ''; - unshift @$rnesting_tokens, $$rnesting_tokens[0]; - unshift @$rci_levels, $$rci_levels[0]; - unshift @$rnesting_blocks, $$rnesting_blocks[0]; - $jmax = 1; - } - - # remember if this line has a side comment - $last_line_had_side_comment = - ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); - - # Handle a block (full-line) comment.. - if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { - - if ( $rOpts->{'delete-block-comments'} ) { return } - - if ( $rOpts->{'tee-block-comments'} ) { - $file_writer_object->tee_on(); - } - - destroy_one_line_block(); - output_line_to_go(); - - # output a blank line before block comments - if ( - $last_line_leading_type !~ /^[#b]$/ - && $rOpts->{'blanks-before-comments'} # only if allowed - && ! - $is_static_block_comment # never before static block comments - ) - { - flush(); # switching to new output stream - $file_writer_object->write_blank_code_line(); - $last_line_leading_type = 'b'; - } - - # TRIM COMMENTS -- This could be turned off as a option - $$rtokens[0] =~ s/\s*$//; # trim right end - - if ( - $rOpts->{'indent-block-comments'} - && ( !$rOpts->{'indent-spaced-block-comments'} - || $input_line =~ /^\s+/ ) - && !$is_static_block_comment_without_leading_space - ) - { - extract_token(0); - store_token_to_go(); - output_line_to_go(); - } - else { - flush(); # switching to new output stream - $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); - $last_line_leading_type = '#'; - } - if ( $rOpts->{'tee-block-comments'} ) { - $file_writer_object->tee_off(); - } - return; - } - - # compare input/output indentation except for continuation lines - # (because they have an unknown amount of initial blank space) - # and lines which are quotes (because they may have been outdented) - # Note: this test is placed here because we know the continuation flag - # at this point, which allows us to avoid non-meaningful checks. - my $structural_indentation_level = $$rlevels[0]; - compare_indentation_levels( $python_indentation_level, - $structural_indentation_level ) - unless ( $python_indentation_level < 0 - || ( $$rci_levels[0] > 0 ) - || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' ) - ); - - # Patch needed for MakeMaker. Do not break a statement - # in which $VERSION may be calculated. See MakeMaker.pm; - # this is based on the coding in it. - # The first line of a file that matches this will be eval'd: - # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ - # Examples: - # *VERSION = \'1.01'; - # ( $VERSION ) = '$Revision: 1.73 $ ' =~ /\$Revision:\s+([^\s]+)/; - # We will pass such a line straight through without breaking - # it unless -npvl is used - - my $is_VERSION_statement = 0; - - if ( - !$saw_VERSION_in_this_file - && $input_line =~ /VERSION/ # quick check to reject most lines - && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ - ) - { - $saw_VERSION_in_this_file = 1; - $is_VERSION_statement = 1; - write_logfile_entry("passing VERSION line; -npvl deactivates\n"); - $no_internal_newlines = 1; - } - - # take care of indentation-only - # NOTE: In previous versions we sent all qw lines out immediately here. - # No longer doing this: also write a line which is entirely a 'qw' list - # to allow stacking of opening and closing tokens. Note that interior - # qw lines will still go out at the end of this routine. - if ( $rOpts->{'indent-only'} ) { - flush(); - trim($input_line); - - extract_token(0); - $token = $input_line; - $type = 'q'; - $block_type = ""; - $container_type = ""; - $container_environment = ""; - $type_sequence = ""; - store_token_to_go(); - output_line_to_go(); - return; - } - - push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding - push( @$rtoken_type, 'b', 'b' ); - ($rwhite_space_flag) = - set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); - - # find input tabbing to allow checks for tabbing disagreement - ## not used for now - ##$input_line_tabbing = ""; - ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; } - - # if the buffer hasn't been flushed, add a leading space if - # necessary to keep essential whitespace. This is really only - # necessary if we are squeezing out all ws. - if ( $max_index_to_go >= 0 ) { - - $old_line_count_in_batch++; - - if ( - is_essential_whitespace( - $last_last_nonblank_token, - $last_last_nonblank_type, - $tokens_to_go[$max_index_to_go], - $types_to_go[$max_index_to_go], - $$rtokens[0], - $$rtoken_type[0] - ) - ) - { - my $slevel = $$rslevels[0]; - insert_new_token_to_go( ' ', 'b', $slevel, - $no_internal_newlines ); - } - } - - # If we just saw the end of an elsif block, write nag message - # if we do not see another elseif or an else. - if ($looking_for_else) { - - unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { - write_logfile_entry("(No else block)\n"); - } - $looking_for_else = 0; - } - - # This is a good place to kill incomplete one-line blocks - if ( ( $semicolons_before_block_self_destruct == 0 ) - && ( $max_index_to_go >= 0 ) - && ( $types_to_go[$max_index_to_go] eq ';' ) - && ( $$rtokens[0] ne '}' ) ) - { - destroy_one_line_block(); - output_line_to_go(); - } - - # loop to process the tokens one-by-one - $type = 'b'; - $token = ""; - - foreach $j ( 0 .. $jmax ) { - - # pull out the local values for this token - extract_token($j); - - if ( $type eq '#' ) { - - # trim trailing whitespace - # (there is no option at present to prevent this) - $token =~ s/\s*$//; - - if ( - $rOpts->{'delete-side-comments'} - - # delete closing side comments if necessary - || ( $rOpts->{'delete-closing-side-comments'} - && $token =~ /$closing_side_comment_prefix_pattern/o - && $last_nonblank_block_type =~ - /$closing_side_comment_list_pattern/o ) - ) - { - if ( $types_to_go[$max_index_to_go] eq 'b' ) { - unstore_token_to_go(); - } - last; - } - } - - # If we are continuing after seeing a right curly brace, flush - # buffer unless we see what we are looking for, as in - # } else ... - if ( $rbrace_follower && $type ne 'b' ) { - - unless ( $rbrace_follower->{$token} ) { - output_line_to_go(); - } - $rbrace_follower = undef; - } - - $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; - $next_nonblank_token = $$rtokens[$j_next]; - $next_nonblank_token_type = $$rtoken_type[$j_next]; - - #-------------------------------------------------------- - # Start of section to patch token text - #-------------------------------------------------------- - - # Modify certain tokens here for whitespace - # The following is not yet done, but could be: - # sub (x x x) - if ( $type =~ /^[wit]$/ ) { - - # Examples: - # change '$ var' to '$var' etc - # '-> new' to '->new' - if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { - $token =~ s/\s*//g; - } - - if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } - } - - # change 'LABEL :' to 'LABEL:' - elsif ( $type eq 'J' ) { $token =~ s/\s+//g } - - # patch to add space to something like "x10" - # This avoids having to split this token in the pre-tokenizer - elsif ( $type eq 'n' ) { - if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } - } - - elsif ( $type eq 'Q' ) { - note_embedded_tab() if ( $token =~ "\t" ); - - # make note of something like '$var = s/xxx/yyy/;' - # in case it should have been '$var =~ s/xxx/yyy/;' - if ( - $token =~ /^(s|tr|y|m|\/)/ - && $last_nonblank_token =~ /^(=|==|!=)$/ - - # precededed by simple scalar - && $last_last_nonblank_type eq 'i' - && $last_last_nonblank_token =~ /^\$/ - - # followed by some kind of termination - # (but give complaint if we can's see far enough ahead) - && $next_nonblank_token =~ /^[; \)\}]$/ - - # scalar is not decleared - && !( - $types_to_go[0] eq 'k' - && $tokens_to_go[0] =~ /^(my|our|local)$/ - ) - ) - { - my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; - complain( -"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" - ); - } - } - - # trim blanks from right of qw quotes - # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) - elsif ( $type eq 'q' ) { - $token =~ s/\s*$//; - note_embedded_tab() if ( $token =~ "\t" ); - } - - #-------------------------------------------------------- - # End of section to patch token text - #-------------------------------------------------------- - - # insert any needed whitespace - if ( ( $type ne 'b' ) - && ( $max_index_to_go >= 0 ) - && ( $types_to_go[$max_index_to_go] ne 'b' ) - && $rOpts_add_whitespace ) - { - my $ws = $$rwhite_space_flag[$j]; - - if ( $ws == 1 ) { - insert_new_token_to_go( ' ', 'b', $slevel, - $no_internal_newlines ); - } - } - - # Do not allow breaks which would promote a side comment to a - # block comment. In order to allow a break before an opening - # or closing BLOCK, followed by a side comment, those sections - # of code will handle this flag separately. - my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); - my $is_opening_BLOCK = - ( $type eq '{' - && $token eq '{' - && $block_type - && $block_type ne 't' ); - my $is_closing_BLOCK = - ( $type eq '}' - && $token eq '}' - && $block_type - && $block_type ne 't' ); - - if ( $side_comment_follows - && !$is_opening_BLOCK - && !$is_closing_BLOCK ) - { - $no_internal_newlines = 1; - } - - # We're only going to handle breaking for code BLOCKS at this - # (top) level. Other indentation breaks will be handled by - # sub scan_list, which is better suited to dealing with them. - if ($is_opening_BLOCK) { - - # Tentatively output this token. This is required before - # calling starting_one_line_block. We may have to unstore - # it, though, if we have to break before it. - store_token_to_go($side_comment_follows); - - # Look ahead to see if we might form a one-line block - my $too_long = - starting_one_line_block( $j, $jmax, $level, $slevel, - $ci_level, $rtokens, $rtoken_type, $rblock_type ); - clear_breakpoint_undo_stack(); - - # to simplify the logic below, set a flag to indicate if - # this opening brace is far from the keyword which introduces it - my $keyword_on_same_line = 1; - if ( ( $max_index_to_go >= 0 ) - && ( $last_nonblank_type eq ')' ) ) - { - if ( $block_type =~ /^(if|else|elsif)$/ - && ( $tokens_to_go[0] eq '}' ) - && $rOpts_cuddled_else ) - { - $keyword_on_same_line = 1; - } - elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) - { - $keyword_on_same_line = 0; - } - } - - # decide if user requested break before '{' - my $want_break = - - # use -bl flag if not a sub block of any type - $block_type !~ /^sub/ - ? $rOpts->{'opening-brace-on-new-line'} - - # use -sbl flag unless this is an anonymous sub block - : $block_type !~ /^sub\W*$/ - ? $rOpts->{'opening-sub-brace-on-new-line'} - - # do not break for anonymous subs - : 0; - - # Break before an opening '{' ... - if ( - - # if requested - $want_break - - # and we were unable to start looking for a block, - && $index_start_one_line_block == UNDEFINED_INDEX - - # or if it will not be on same line as its keyword, so that - # it will be outdented (eval.t, overload.t), and the user - # has not insisted on keeping it on the right - || ( !$keyword_on_same_line - && !$rOpts->{'opening-brace-always-on-right'} ) - - ) - { - - # but only if allowed - unless ($no_internal_newlines) { - - # since we already stored this token, we must unstore it - unstore_token_to_go(); - - # then output the line - output_line_to_go(); - - # and now store this token at the start of a new line - store_token_to_go($side_comment_follows); - } - } - - # Now update for side comment - if ($side_comment_follows) { $no_internal_newlines = 1 } - - # now output this line - unless ($no_internal_newlines) { - output_line_to_go(); - } - } - - elsif ($is_closing_BLOCK) { - - # If there is a pending one-line block .. - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { - - # we have to terminate it if.. - if ( - - # it is too long (final length may be different from - # initial estimate). note: must allow 1 space for this token - excess_line_length( $index_start_one_line_block, - $max_index_to_go ) >= 0 - - # or if it has too many semicolons - || ( $semicolons_before_block_self_destruct == 0 - && $last_nonblank_type ne ';' ) - ) - { - destroy_one_line_block(); - } - } - - # put a break before this closing curly brace if appropriate - unless ( $no_internal_newlines - || $index_start_one_line_block != UNDEFINED_INDEX ) - { - - # add missing semicolon if ... - # there are some tokens - if ( - ( $max_index_to_go > 0 ) - - # and we don't have one - && ( $last_nonblank_type ne ';' ) - - # patch until some block type issues are fixed: - # Do not add semi-colon for block types '{', - # '}', and ';' because we cannot be sure yet - # that this is a block and not an anonomyous - # hash (blktype.t, blktype1.t) - && ( $block_type !~ /^[\{\};]$/ ) - - # it seems best not to add semicolons in these - # special block types: sort|map|grep - && ( !$is_sort_map_grep{$block_type} ) - - # and we are allowed to do so. - && $rOpts->{'add-semicolons'} - ) - { - - save_current_token(); - $token = ';'; - $type = ';'; - $level = $levels_to_go[$max_index_to_go]; - $slevel = $nesting_depth_to_go[$max_index_to_go]; - $nesting_blocks = - $nesting_blocks_to_go[$max_index_to_go]; - $ci_level = $ci_levels_to_go[$max_index_to_go]; - $block_type = ""; - $container_type = ""; - $container_environment = ""; - $type_sequence = ""; - - # Note - we remove any blank AFTER extracting its - # parameters such as level, etc, above - if ( $types_to_go[$max_index_to_go] eq 'b' ) { - unstore_token_to_go(); - } - store_token_to_go(); - - note_added_semicolon(); - restore_current_token(); - } - - # then write out everything before this closing curly brace - output_line_to_go(); - - } - - # Now update for side comment - if ($side_comment_follows) { $no_internal_newlines = 1 } - - # store the closing curly brace - store_token_to_go(); - - # ok, we just stored a closing curly brace. Often, but - # not always, we want to end the line immediately. - # So now we have to check for special cases. - - # if this '}' successfully ends a one-line block.. - my $is_one_line_block = 0; - my $keep_going = 0; - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { - - # Remember the type of token just before the - # opening brace. It would be more general to use - # a stack, but this will work for one-line blocks. - $is_one_line_block = - $types_to_go[$index_start_one_line_block]; - - # we have to actually make it by removing tentative - # breaks that were set within it - undo_forced_breakpoint_stack(0); - set_nobreaks( $index_start_one_line_block, - $max_index_to_go - 1 ); - - # then re-initialize for the next one-line block - destroy_one_line_block(); - - # then decide if we want to break after the '}' .. - # We will keep going to allow certain brace followers as in: - # do { $ifclosed = 1; last } unless $losing; - # - # But make a line break if the curly ends a - # significant block: - if ( - $is_block_without_semicolon{$block_type} - - # if needless semicolon follows we handle it later - && $next_nonblank_token ne ';' - ) - { - output_line_to_go() unless ($no_internal_newlines); - } - } - - # set string indicating what we need to look for brace follower - # tokens - if ( $block_type eq 'do' ) { - $rbrace_follower = \%is_do_follower; - } - elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { - $rbrace_follower = \%is_if_brace_follower; - } - elsif ( $block_type eq 'else' ) { - $rbrace_follower = \%is_else_brace_follower; - } - - # added eval for borris.t - elsif ($is_sort_map_grep_eval{$block_type} - || $is_one_line_block eq 'G' ) - { - $rbrace_follower = undef; - $keep_going = 1; - } - - # anonymous sub - elsif ( $block_type =~ /^sub\W*$/ ) { - - if ($is_one_line_block) { - $rbrace_follower = \%is_anon_sub_1_brace_follower; - } - else { - $rbrace_follower = \%is_anon_sub_brace_follower; - } - } - - # None of the above: specify what can follow a closing - # brace of a block which is not an - # if/elsif/else/do/sort/map/grep/eval - # Testfiles: - # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t - else { - $rbrace_follower = \%is_other_brace_follower; - } - - # See if an elsif block is followed by another elsif or else; - # complain if not. - if ( $block_type eq 'elsif' ) { - - if ( $next_nonblank_token_type eq 'b' ) { # end of line? - $looking_for_else = 1; # ok, check on next line - } - else { - - unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { - write_logfile_entry("No else block :(\n"); - } - } - } - - # keep going after certain block types (map,sort,grep,eval) - # added eval for borris.t - if ($keep_going) { - - # keep going - } - - # if no more tokens, postpone decision until re-entring - elsif ( ( $next_nonblank_token_type eq 'b' ) - && $rOpts_add_newlines ) - { - unless ($rbrace_follower) { - output_line_to_go() unless ($no_internal_newlines); - } - } - - elsif ($rbrace_follower) { - - unless ( $rbrace_follower->{$next_nonblank_token} ) { - output_line_to_go() unless ($no_internal_newlines); - } - $rbrace_follower = undef; - } - - else { - output_line_to_go() unless ($no_internal_newlines); - } - - } # end treatment of closing block token - - # handle semicolon - elsif ( $type eq ';' ) { - - # kill one-line blocks with too many semicolons - $semicolons_before_block_self_destruct--; - if ( - ( $semicolons_before_block_self_destruct < 0 ) - || ( $semicolons_before_block_self_destruct == 0 - && $next_nonblank_token_type !~ /^[b\}]$/ ) - ) - { - destroy_one_line_block(); - } - - # Remove unnecessary semicolons, but not after bare - # blocks, where it could be unsafe if the brace is - # mistokenized. - if ( - ( - $last_nonblank_token eq '}' - && ( - $is_block_without_semicolon{ - $last_nonblank_block_type} - || $last_nonblank_block_type =~ /^sub\s+\w/ - || $last_nonblank_block_type =~ /^\w+:$/ ) - ) - || $last_nonblank_type eq ';' - ) - { - - if ( - $rOpts->{'delete-semicolons'} - - # don't delete ; before a # because it would promote it - # to a block comment - && ( $next_nonblank_token_type ne '#' ) - ) - { - note_deleted_semicolon(); - output_line_to_go() - unless ( $no_internal_newlines - || $index_start_one_line_block != UNDEFINED_INDEX ); - next; - } - else { - write_logfile_entry("Extra ';'\n"); - } - } - store_token_to_go(); - - output_line_to_go() - unless ( $no_internal_newlines - || ( $rOpts_keep_interior_semicolons && $j < $jmax ) - || ( $next_nonblank_token eq '}' ) ); - - } - - # handle here_doc target string - elsif ( $type eq 'h' ) { - $no_internal_newlines = - 1; # no newlines after seeing here-target - destroy_one_line_block(); - store_token_to_go(); - } - - # handle all other token types - else { - - # if this is a blank... - if ( $type eq 'b' ) { - - # make it just one character - $token = ' ' if $rOpts_add_whitespace; - - # delete it if unwanted by whitespace rules - # or we are deleting all whitespace - my $ws = $$rwhite_space_flag[ $j + 1 ]; - if ( ( defined($ws) && $ws == -1 ) - || $rOpts_delete_old_whitespace ) - { - - # unless it might make a syntax error - next - unless is_essential_whitespace( - $last_last_nonblank_token, - $last_last_nonblank_type, - $tokens_to_go[$max_index_to_go], - $types_to_go[$max_index_to_go], - $$rtokens[ $j + 1 ], - $$rtoken_type[ $j + 1 ] - ); - } - } - store_token_to_go(); - } - - # remember two previous nonblank OUTPUT tokens - if ( $type ne '#' && $type ne 'b' ) { - $last_last_nonblank_token = $last_nonblank_token; - $last_last_nonblank_type = $last_nonblank_type; - $last_nonblank_token = $token; - $last_nonblank_type = $type; - $last_nonblank_block_type = $block_type; - } - - # unset the continued-quote flag since it only applies to the - # first token, and we want to resume normal formatting if - # there are additional tokens on the line - $in_continued_quote = 0; - - } # end of loop over all tokens in this 'line_of_tokens' - - # we have to flush .. - if ( - - # if there is a side comment - ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) - - # if this line ends in a quote - # NOTE: This is critically important for insuring that quoted lines - # do not get processed by things like -sot and -sct - || $in_quote - - # if this is a VERSION statement - || $is_VERSION_statement - - # to keep a label on one line if that is how it is now - || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) ) - - # if we are instructed to keep all old line breaks - || !$rOpts->{'delete-old-newlines'} - ) - { - destroy_one_line_block(); - output_line_to_go(); - } - - # mark old line breakpoints in current output stream - if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { - $old_breakpoint_to_go[$max_index_to_go] = 1; - } - } # end sub print_line_of_tokens -} # end print_line_of_tokens - -# sub output_line_to_go sends one logical line of tokens on down the -# pipeline to the VerticalAligner package, breaking the line into continuation -# lines as necessary. The line of tokens is ready to go in the "to_go" -# arrays. -sub output_line_to_go { - - # debug stuff; this routine can be called from many points - FORMATTER_DEBUG_FLAG_OUTPUT && do { - my ( $a, $b, $c ) = caller; - write_diagnostics( -"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" - ); - my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; - write_diagnostics("$output_str\n"); - }; - - # just set a tentative breakpoint if we might be in a one-line block - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { - set_forced_breakpoint($max_index_to_go); - return; - } - - my $cscw_block_comment; - $cscw_block_comment = add_closing_side_comment() - if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); - - match_opening_and_closing_tokens(); - - # tell the -lp option we are outputting a batch so it can close - # any unfinished items in its stack - finish_lp_batch(); - - # If this line ends in a code block brace, set breaks at any - # previous closing code block braces to breakup a chain of code - # blocks on one line. This is very rare but can happen for - # user-defined subs. For example we might be looking at this: - # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { - my $saw_good_break = 0; # flag to force breaks even if short line - if ( - - # looking for opening or closing block brace - $block_type_to_go[$max_index_to_go] - - # but not one of these which are never duplicated on a line: - # until|while|for|if|elsif|else - && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } - ) - { - my $lev = $nesting_depth_to_go[$max_index_to_go]; - - # Walk backwards from the end and - # set break at any closing block braces at the same level. - # But quit if we are not in a chain of blocks. - for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { - last if ( $levels_to_go[$i] < $lev ); # stop at a lower level - next if ( $levels_to_go[$i] > $lev ); # skip past higher level - - if ( $block_type_to_go[$i] ) { - if ( $tokens_to_go[$i] eq '}' ) { - set_forced_breakpoint($i); - $saw_good_break = 1; - } - } - - # quit if we see anything besides words, function, blanks - # at this level - elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } - } - } - - my $imin = 0; - my $imax = $max_index_to_go; - - # trim any blank tokens - if ( $max_index_to_go >= 0 ) { - if ( $types_to_go[$imin] eq 'b' ) { $imin++ } - if ( $types_to_go[$imax] eq 'b' ) { $imax-- } - } - - # anything left to write? - if ( $imin <= $imax ) { - - # add a blank line before certain key types - if ( $last_line_leading_type !~ /^[#b]/ ) { - my $want_blank = 0; - my $leading_token = $tokens_to_go[$imin]; - my $leading_type = $types_to_go[$imin]; - - # blank lines before subs except declarations and one-liners - # MCONVERSION LOCATION - for sub tokenization change - if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { - $want_blank = ( $rOpts->{'blanks-before-subs'} ) - && ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) !~ /^[\;\}]$/ - ); - } - - # break before all package declarations - # MCONVERSION LOCATION - for tokenizaton change - elsif ($leading_token =~ /^(package\s)/ - && $leading_type eq 'i' ) - { - $want_blank = ( $rOpts->{'blanks-before-subs'} ); - } - - # break before certain key blocks except one-liners - if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { - $want_blank = ( $rOpts->{'blanks-before-subs'} ) - && ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) ne '}' - ); - } - - # Break before certain block types if we haven't had a - # break at this level for a while. This is the - # difficult decision.. - elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/ - && $leading_type eq 'k' ) - { - my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; - if ( !defined($lc) ) { $lc = 0 } - - $want_blank = - $rOpts->{'blanks-before-blocks'} - && $lc >= $rOpts->{'long-block-line-count'} - && $file_writer_object->get_consecutive_nonblank_lines() >= - $rOpts->{'long-block-line-count'} - && ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) ne '}' - ); - } - - if ($want_blank) { - - # future: send blank line down normal path to VerticalAligner - Perl::Tidy::VerticalAligner::flush(); - $file_writer_object->write_blank_code_line(); - } - } - - # update blank line variables and count number of consecutive - # non-blank, non-comment lines at this level - $last_last_line_leading_level = $last_line_leading_level; - $last_line_leading_level = $levels_to_go[$imin]; - if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } - $last_line_leading_type = $types_to_go[$imin]; - if ( $last_line_leading_level == $last_last_line_leading_level - && $last_line_leading_type ne 'b' - && $last_line_leading_type ne '#' - && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) - { - $nonblank_lines_at_depth[$last_line_leading_level]++; - } - else { - $nonblank_lines_at_depth[$last_line_leading_level] = 1; - } - - FORMATTER_DEBUG_FLAG_FLUSH && do { - my ( $package, $file, $line ) = caller; - print -"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; - }; - - # add a couple of extra terminal blank tokens - pad_array_to_go(); - - # set all forced breakpoints for good list formatting - my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; - - if ( - $max_index_to_go > 0 - && ( - $is_long_line - || $old_line_count_in_batch > 1 - || is_unbalanced_batch() - || ( - $comma_count_in_batch - && ( $rOpts_maximum_fields_per_table > 0 - || $rOpts_comma_arrow_breakpoints == 0 ) - ) - ) - ) - { - $saw_good_break ||= scan_list(); - } - - # let $ri_first and $ri_last be references to lists of - # first and last tokens of line fragments to output.. - my ( $ri_first, $ri_last ); - - # write a single line if.. - if ( - - # we aren't allowed to add any newlines - !$rOpts_add_newlines - - # or, we don't already have an interior breakpoint - # and we didn't see a good breakpoint - || ( - !$forced_breakpoint_count - && !$saw_good_break - - # and this line is 'short' - && !$is_long_line - ) - ) - { - @$ri_first = ($imin); - @$ri_last = ($imax); - } - - # otherwise use multiple lines - else { - - ( $ri_first, $ri_last, my $colon_count ) = - set_continuation_breaks($saw_good_break); - - break_all_chain_tokens( $ri_first, $ri_last ); - - break_equals( $ri_first, $ri_last ); - - # now we do a correction step to clean this up a bit - # (The only time we would not do this is for debugging) - if ( $rOpts->{'recombine'} ) { - ( $ri_first, $ri_last ) = - recombine_breakpoints( $ri_first, $ri_last ); - } - - insert_final_breaks( $ri_first, $ri_last ) if $colon_count; - } - - # do corrector step if -lp option is used - my $do_not_pad = 0; - if ($rOpts_line_up_parentheses) { - $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); - } - send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); - } - prepare_for_new_input_lines(); - - # output any new -cscw block comment - if ($cscw_block_comment) { - flush(); - $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); - } -} - -sub note_added_semicolon { - $last_added_semicolon_at = $input_line_number; - if ( $added_semicolon_count == 0 ) { - $first_added_semicolon_at = $last_added_semicolon_at; - } - $added_semicolon_count++; - write_logfile_entry("Added ';' here\n"); -} - -sub note_deleted_semicolon { - $last_deleted_semicolon_at = $input_line_number; - if ( $deleted_semicolon_count == 0 ) { - $first_deleted_semicolon_at = $last_deleted_semicolon_at; - } - $deleted_semicolon_count++; - write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) -} - -sub note_embedded_tab { - $embedded_tab_count++; - $last_embedded_tab_at = $input_line_number; - if ( !$first_embedded_tab_at ) { - $first_embedded_tab_at = $last_embedded_tab_at; - } - - if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { - write_logfile_entry("Embedded tabs in quote or pattern\n"); - } -} - -sub starting_one_line_block { - - # after seeing an opening curly brace, look for the closing brace - # and see if the entire block will fit on a line. This routine is - # not always right because it uses the old whitespace, so a check - # is made later (at the closing brace) to make sure we really - # have a one-line block. We have to do this preliminary check, - # though, because otherwise we would always break at a semicolon - # within a one-line block if the block contains multiple statements. - - my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, - $rblock_type ) - = @_; - - # kill any current block - we can only go 1 deep - destroy_one_line_block(); - - # return value: - # 1=distance from start of block to opening brace exceeds line length - # 0=otherwise - - my $i_start = 0; - - # shouldn't happen: there must have been a prior call to - # store_token_to_go to put the opening brace in the output stream - if ( $max_index_to_go < 0 ) { - warning("program bug: store_token_to_go called incorrectly\n"); - report_definite_bug(); - } - else { - - # cannot use one-line blocks with cuddled else else/elsif lines - if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { - return 0; - } - } - - my $block_type = $$rblock_type[$j]; - - # find the starting keyword for this block (such as 'if', 'else', ...) - - if ( $block_type =~ /^[\{\}\;\:]$/ ) { - $i_start = $max_index_to_go; - } - - elsif ( $last_last_nonblank_token_to_go eq ')' ) { - - # For something like "if (xxx) {", the keyword "if" will be - # just after the most recent break. This will be 0 unless - # we have just killed a one-line block and are starting another. - # (doif.t) - $i_start = $index_max_forced_break + 1; - if ( $types_to_go[$i_start] eq 'b' ) { - $i_start++; - } - - unless ( $tokens_to_go[$i_start] eq $block_type ) { - return 0; - } - } - - # the previous nonblank token should start these block types - elsif ( - ( $last_last_nonblank_token_to_go eq $block_type ) - || ( $block_type =~ /^sub/ - && $last_last_nonblank_token_to_go =~ /^sub/ ) - ) - { - $i_start = $last_last_nonblank_index_to_go; - } - - # patch for SWITCH/CASE to retain one-line case/when blocks - elsif ( $block_type eq 'case' || $block_type eq 'when' ) { - $i_start = $index_max_forced_break + 1; - if ( $types_to_go[$i_start] eq 'b' ) { - $i_start++; - } - unless ( $tokens_to_go[$i_start] eq $block_type ) { - return 0; - } - } - - else { - return 1; - } - - my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; - - my $i; - - # see if length is too long to even start - if ( $pos > $rOpts_maximum_line_length ) { - return 1; - } - - for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { - - # old whitespace could be arbitrarily large, so don't use it - if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } - else { $pos += length( $$rtokens[$i] ) } - - # Return false result if we exceed the maximum line length, - if ( $pos > $rOpts_maximum_line_length ) { - return 0; - } - - # or encounter another opening brace before finding the closing brace. - elsif ($$rtokens[$i] eq '{' - && $$rtoken_type[$i] eq '{' - && $$rblock_type[$i] ) - { - return 0; - } - - # if we find our closing brace.. - elsif ($$rtokens[$i] eq '}' - && $$rtoken_type[$i] eq '}' - && $$rblock_type[$i] ) - { - - # be sure any trailing comment also fits on the line - my $i_nonblank = - ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; - - if ( $$rtoken_type[$i_nonblank] eq '#' ) { - $pos += length( $$rtokens[$i_nonblank] ); - - if ( $i_nonblank > $i + 1 ) { - $pos += length( $$rtokens[ $i + 1 ] ); - } - - if ( $pos > $rOpts_maximum_line_length ) { - return 0; - } - } - - # ok, it's a one-line block - create_one_line_block( $i_start, 20 ); - return 0; - } - - # just keep going for other characters - else { - } - } - - # Allow certain types of new one-line blocks to form by joining - # input lines. These can be safely done, but for other block types, - # we keep old one-line blocks but do not form new ones. It is not - # always a good idea to make as many one-line blocks as possible, - # so other types are not done. The user can always use -mangle. - if ( $is_sort_map_grep_eval{$block_type} ) { - create_one_line_block( $i_start, 1 ); - } - - return 0; -} - -sub unstore_token_to_go { - - # remove most recent token from output stream - if ( $max_index_to_go > 0 ) { - $max_index_to_go--; - } - else { - $max_index_to_go = UNDEFINED_INDEX; - } - -} - -sub want_blank_line { - flush(); - $file_writer_object->want_blank_line(); -} - -sub write_unindented_line { - flush(); - $file_writer_object->write_line( $_[0] ); -} - -sub undo_lp_ci { - - # If there is a single, long parameter within parens, like this: - # - # $self->command( "/msg " - # . $infoline->chan - # . " You said $1, but did you know that it's square was " - # . $1 * $1 . " ?" ); - # - # we can remove the continuation indentation of the 2nd and higher lines - # to achieve this effect, which is more pleasing: - # - # $self->command("/msg " - # . $infoline->chan - # . " You said $1, but did you know that it's square was " - # . $1 * $1 . " ?"); - - my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; - my $max_line = @$ri_first - 1; - - # must be multiple lines - return unless $max_line > $line_open; - - my $lev_start = $levels_to_go[$i_start]; - my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; - - # see if all additional lines in this container have continuation - # indentation - my $n; - my $line_1 = 1 + $line_open; - for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { - my $ibeg = $$ri_first[$n]; - my $iend = $$ri_last[$n]; - if ( $ibeg eq $closing_index ) { $n--; last } - return if ( $lev_start != $levels_to_go[$ibeg] ); - return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); - last if ( $closing_index <= $iend ); - } - - # we can reduce the indentation of all continuation lines - my $continuation_line_count = $n - $line_open; - @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] = - (0) x ($continuation_line_count); - @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] = - @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; -} - -sub set_logical_padding { - - # Look at a batch of lines and see if extra padding can improve the - # alignment when there are certain leading operators. Here is an - # example, in which some extra space is introduced before - # '( $year' to make it line up with the subsequent lines: - # - # if ( ( $Year < 1601 ) - # || ( $Year > 2899 ) - # || ( $EndYear < 1601 ) - # || ( $EndYear > 2899 ) ) - # { - # &Error_OutOfRange; - # } - # - my ( $ri_first, $ri_last ) = @_; - my $max_line = @$ri_first - 1; - - my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces, - $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); - - # looking at each line of this batch.. - foreach $line ( 0 .. $max_line - 1 ) { - - # see if the next line begins with a logical operator - $ibeg = $$ri_first[$line]; - $iend = $$ri_last[$line]; - $ibeg_next = $$ri_first[ $line + 1 ]; - $tok_next = $tokens_to_go[$ibeg_next]; - $type_next = $types_to_go[$ibeg_next]; - - $has_leading_op_next = ( $tok_next =~ /^\w/ ) - ? $is_chain_operator{$tok_next} # + - * / : ? && || - : $is_chain_operator{$type_next}; # and, or - - next unless ($has_leading_op_next); - - # next line must not be at lesser depth - next - if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] ); - - # identify the token in this line to be padded on the left - $ipad = undef; - - # handle lines at same depth... - if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) { - - # if this is not first line of the batch ... - if ( $line > 0 ) { - - # and we have leading operator.. - next if $has_leading_op; - - # Introduce padding if.. - # 1. the previous line is at lesser depth, or - # 2. the previous line ends in an assignment - # 3. the previous line ends in a 'return' - # 4. the previous line ends in a comma - # Example 1: previous line at lesser depth - # if ( ( $Year < 1601 ) # <- we are here but - # || ( $Year > 2899 ) # list has not yet - # || ( $EndYear < 1601 ) # collapsed vertically - # || ( $EndYear > 2899 ) ) - # { - # - # Example 2: previous line ending in assignment: - # $leapyear = - # $year % 4 ? 0 # <- We are here - # : $year % 100 ? 1 - # : $year % 400 ? 0 - # : 1; - # - # Example 3: previous line ending in comma: - # push @expr, - # /test/ ? undef - # : eval($_) ? 1 - # : eval($_) ? 1 - # : 0; - - # be sure levels agree (do not indent after an indented 'if') - next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); - - # allow padding on first line after a comma but only if: - # (1) this is line 2 and - # (2) there are at more than three lines and - # (3) lines 3 and 4 have the same leading operator - # These rules try to prevent padding within a long - # comma-separated list. - my $ok_comma; - if ( $types_to_go[$iendm] eq ',' - && $line == 1 - && $max_line > 2 ) - { - my $ibeg_next_next = $$ri_first[ $line + 2 ]; - my $tok_next_next = $tokens_to_go[$ibeg_next_next]; - $ok_comma = $tok_next_next eq $tok_next; - } - - next - unless ( - $is_assignment{ $types_to_go[$iendm] } - || $ok_comma - || ( $nesting_depth_to_go[$ibegm] < - $nesting_depth_to_go[$ibeg] ) - || ( $types_to_go[$iendm] eq 'k' - && $tokens_to_go[$iendm] eq 'return' ) - ); - - # we will add padding before the first token - $ipad = $ibeg; - } - - # for first line of the batch.. - else { - - # WARNING: Never indent if first line is starting in a - # continued quote, which would change the quote. - next if $starting_in_quote; - - # if this is text after closing '}' - # then look for an interior token to pad - if ( $types_to_go[$ibeg] eq '}' ) { - - } - - # otherwise, we might pad if it looks really good - else { - - # we might pad token $ibeg, so be sure that it - # is at the same depth as the next line. - next - if ( $nesting_depth_to_go[$ibeg] != - $nesting_depth_to_go[$ibeg_next] ); - - # We can pad on line 1 of a statement if at least 3 - # lines will be aligned. Otherwise, it - # can look very confusing. - - # We have to be careful not to pad if there are too few - # lines. The current rule is: - # (1) in general we require at least 3 consecutive lines - # with the same leading chain operator token, - # (2) but an exception is that we only require two lines - # with leading colons if there are no more lines. For example, - # the first $i in the following snippet would get padding - # by the second rule: - # - # $i == 1 ? ( "First", "Color" ) - # : $i == 2 ? ( "Then", "Rarity" ) - # : ( "Then", "Name" ); - - if ( $max_line > 1 ) { - my $leading_token = $tokens_to_go[$ibeg_next]; - my $tokens_differ; - - # never indent line 1 of a '.' series because - # previous line is most likely at same level. - # TODO: we should also look at the leasing_spaces - # of the last output line and skip if it is same - # as this line. - next if ( $leading_token eq '.' ); - - my $count = 1; - foreach my $l ( 2 .. 3 ) { - last if ( $line + $l > $max_line ); - my $ibeg_next_next = $$ri_first[ $line + $l ]; - if ( $tokens_to_go[$ibeg_next_next] ne - $leading_token ) - { - $tokens_differ = 1; - last; - } - $count++; - } - next if ($tokens_differ); - next if ( $count < 3 && $leading_token ne ':' ); - $ipad = $ibeg; - } - else { - next; - } - } - } - } - - # find interior token to pad if necessary - if ( !defined($ipad) ) { - - for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { - - # find any unclosed container - next - unless ( $type_sequence_to_go[$i] - && $mate_index_to_go[$i] > $iend ); - - # find next nonblank token to pad - $ipad = $i + 1; - if ( $types_to_go[$ipad] eq 'b' ) { - $ipad++; - last if ( $ipad > $iend ); - } - } - last unless $ipad; - } - - # next line must not be at greater depth - my $iend_next = $$ri_last[ $line + 1 ]; - next - if ( $nesting_depth_to_go[ $iend_next + 1 ] > - $nesting_depth_to_go[$ipad] ); - - # lines must be somewhat similar to be padded.. - my $inext_next = $ibeg_next + 1; - if ( $types_to_go[$inext_next] eq 'b' ) { - $inext_next++; - } - my $type = $types_to_go[$ipad]; - my $type_next = $types_to_go[ $ipad + 1 ]; - - # see if there are multiple continuation lines - my $logical_continuation_lines = 1; - if ( $line + 2 <= $max_line ) { - my $leading_token = $tokens_to_go[$ibeg_next]; - my $ibeg_next_next = $$ri_first[ $line + 2 ]; - if ( $tokens_to_go[$ibeg_next_next] eq $leading_token - && $nesting_depth_to_go[$ibeg_next] eq - $nesting_depth_to_go[$ibeg_next_next] ) - { - $logical_continuation_lines++; - } - } - - # see if leading types match - my $types_match = $types_to_go[$inext_next] eq $type; - my $matches_without_bang; - - # if first line has leading ! then compare the following token - if ( !$types_match && $type eq '!' ) { - $types_match = $matches_without_bang = - $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; - } - - if ( - - # either we have multiple continuation lines to follow - # and we are not padding the first token - ( $logical_continuation_lines > 1 && $ipad > 0 ) - - # or.. - || ( - - # types must match - $types_match - - # and keywords must match if keyword - && !( - $type eq 'k' - && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] - ) - ) - ) - { - - #----------------------begin special checks-------------- - # - # SPECIAL CHECK 1: - # A check is needed before we can make the pad. - # If we are in a list with some long items, we want each - # item to stand out. So in the following example, the - # first line begining with '$casefold->' would look good - # padded to align with the next line, but then it - # would be indented more than the last line, so we - # won't do it. - # - # ok( - # $casefold->{code} eq '0041' - # && $casefold->{status} eq 'C' - # && $casefold->{mapping} eq '0061', - # 'casefold 0x41' - # ); - # - # Note: - # It would be faster, and almost as good, to use a comma - # count, and not pad if comma_count > 1 and the previous - # line did not end with a comma. - # - my $ok_to_pad = 1; - - my $ibg = $$ri_first[ $line + 1 ]; - my $depth = $nesting_depth_to_go[ $ibg + 1 ]; - - # just use simplified formula for leading spaces to avoid - # needless sub calls - my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; - - # look at each line beyond the next .. - my $l = $line + 1; - foreach $l ( $line + 2 .. $max_line ) { - my $ibg = $$ri_first[$l]; - - # quit looking at the end of this container - last - if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) - || ( $nesting_depth_to_go[$ibg] < $depth ); - - # cannot do the pad if a later line would be - # outdented more - if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { - $ok_to_pad = 0; - last; - } - } - - # don't pad if we end in a broken list - if ( $l == $max_line ) { - my $i2 = $$ri_last[$l]; - if ( $types_to_go[$i2] eq '#' ) { - my $i1 = $$ri_first[$l]; - next - if ( - terminal_type( \@types_to_go, \@block_type_to_go, $i1, - $i2 ) eq ',' - ); - } - } - - # SPECIAL CHECK 2: - # a minus may introduce a quoted variable, and we will - # add the pad only if this line begins with a bare word, - # such as for the word 'Button' here: - # [ - # Button => "Print letter \"~$_\"", - # -command => [ sub { print "$_[0]\n" }, $_ ], - # -accelerator => "Meta+$_" - # ]; - # - # On the other hand, if 'Button' is quoted, it looks best - # not to pad: - # [ - # 'Button' => "Print letter \"~$_\"", - # -command => [ sub { print "$_[0]\n" }, $_ ], - # -accelerator => "Meta+$_" - # ]; - if ( $types_to_go[$ibeg_next] eq 'm' ) { - $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; - } - - next unless $ok_to_pad; - - #----------------------end special check--------------- - - my $length_1 = total_line_length( $ibeg, $ipad - 1 ); - my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); - $pad_spaces = $length_2 - $length_1; - - # If the first line has a leading ! and the second does - # not, then remove one space to try to align the next - # leading characters, which are often the same. For example: - # if ( !$ts - # || $ts == $self->Holder - # || $self->Holder->Type eq "Arena" ) - # - # This usually helps readability, but if there are subsequent - # ! operators things will still get messed up. For example: - # - # if ( !exists $Net::DNS::typesbyname{$qtype} - # && exists $Net::DNS::classesbyname{$qtype} - # && !exists $Net::DNS::classesbyname{$qclass} - # && exists $Net::DNS::typesbyname{$qclass} ) - # We can't fix that. - if ($matches_without_bang) { $pad_spaces-- } - - # make sure this won't change if -lp is used - my $indentation_1 = $leading_spaces_to_go[$ibeg]; - if ( ref($indentation_1) ) { - if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { - my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; - unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) { - $pad_spaces = 0; - } - } - } - - # we might be able to handle a pad of -1 by removing a blank - # token - if ( $pad_spaces < 0 ) { - - if ( $pad_spaces == -1 ) { - if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) { - $tokens_to_go[ $ipad - 1 ] = ''; - } - } - $pad_spaces = 0; - } - - # now apply any padding for alignment - if ( $ipad >= 0 && $pad_spaces ) { - - my $length_t = total_line_length( $ibeg, $iend ); - if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) { - $tokens_to_go[$ipad] = - ' ' x $pad_spaces . $tokens_to_go[$ipad]; - } - } - } - } - continue { - $iendm = $iend; - $ibegm = $ibeg; - $has_leading_op = $has_leading_op_next; - } # end of loop over lines - return; -} - -sub correct_lp_indentation { - - # When the -lp option is used, we need to make a last pass through - # each line to correct the indentation positions in case they differ - # from the predictions. This is necessary because perltidy uses a - # predictor/corrector method for aligning with opening parens. The - # predictor is usually good, but sometimes stumbles. The corrector - # tries to patch things up once the actual opening paren locations - # are known. - my ( $ri_first, $ri_last ) = @_; - my $do_not_pad = 0; - - # Note on flag '$do_not_pad': - # We want to avoid a situation like this, where the aligner inserts - # whitespace before the '=' to align it with a previous '=', because - # otherwise the parens might become mis-aligned in a situation like - # this, where the '=' has become aligned with the previous line, - # pushing the opening '(' forward beyond where we want it. - # - # $mkFloor::currentRoom = ''; - # $mkFloor::c_entry = $c->Entry( - # -width => '10', - # -relief => 'sunken', - # ... - # ); - # - # We leave it to the aligner to decide how to do this. - - # first remove continuation indentation if appropriate - my $max_line = @$ri_first - 1; - - # looking at each line of this batch.. - my ( $ibeg, $iend ); - my $line; - foreach $line ( 0 .. $max_line ) { - $ibeg = $$ri_first[$line]; - $iend = $$ri_last[$line]; - - # looking at each token in this output line.. - my $i; - foreach $i ( $ibeg .. $iend ) { - - # How many space characters to place before this token - # for special alignment. Actual padding is done in the - # continue block. - - # looking for next unvisited indentation item - my $indentation = $leading_spaces_to_go[$i]; - if ( !$indentation->get_MARKED() ) { - $indentation->set_MARKED(1); - - # looking for indentation item for which we are aligning - # with parens, braces, and brackets - next unless ( $indentation->get_ALIGN_PAREN() ); - - # skip closed container on this line - if ( $i > $ibeg ) { - my $im = $i - 1; - if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- } - if ( $type_sequence_to_go[$im] - && $mate_index_to_go[$im] <= $iend ) - { - next; - } - } - - if ( $line == 1 && $i == $ibeg ) { - $do_not_pad = 1; - } - - # Ok, let's see what the error is and try to fix it - my $actual_pos; - my $predicted_pos = $indentation->get_SPACES(); - if ( $i > $ibeg ) { - - # token is mid-line - use length to previous token - $actual_pos = total_line_length( $ibeg, $i - 1 ); - - # for mid-line token, we must check to see if all - # additional lines have continuation indentation, - # and remove it if so. Otherwise, we do not get - # good alignment. - my $closing_index = $indentation->get_CLOSED(); - if ( $closing_index > $iend ) { - my $ibeg_next = $$ri_first[ $line + 1 ]; - if ( $ci_levels_to_go[$ibeg_next] > 0 ) { - undo_lp_ci( $line, $i, $closing_index, $ri_first, - $ri_last ); - } - } - } - elsif ( $line > 0 ) { - - # handle case where token starts a new line; - # use length of previous line - my $ibegm = $$ri_first[ $line - 1 ]; - my $iendm = $$ri_last[ $line - 1 ]; - $actual_pos = total_line_length( $ibegm, $iendm ); - - # follow -pt style - ++$actual_pos - if ( $types_to_go[ $iendm + 1 ] eq 'b' ); - } - else { - - # token is first character of first line of batch - $actual_pos = $predicted_pos; - } - - my $move_right = $actual_pos - $predicted_pos; - - # done if no error to correct (gnu2.t) - if ( $move_right == 0 ) { - $indentation->set_RECOVERABLE_SPACES($move_right); - next; - } - - # if we have not seen closure for this indentation in - # this batch, we can only pass on a request to the - # vertical aligner - my $closing_index = $indentation->get_CLOSED(); - - if ( $closing_index < 0 ) { - $indentation->set_RECOVERABLE_SPACES($move_right); - next; - } - - # If necessary, look ahead to see if there is really any - # leading whitespace dependent on this whitespace, and - # also find the longest line using this whitespace. - # Since it is always safe to move left if there are no - # dependents, we only need to do this if we may have - # dependent nodes or need to move right. - - my $right_margin = 0; - my $have_child = $indentation->get_HAVE_CHILD(); - - my %saw_indentation; - my $line_count = 1; - $saw_indentation{$indentation} = $indentation; - - if ( $have_child || $move_right > 0 ) { - $have_child = 0; - my $max_length = 0; - if ( $i == $ibeg ) { - $max_length = total_line_length( $ibeg, $iend ); - } - - # look ahead at the rest of the lines of this batch.. - my $line_t; - foreach $line_t ( $line + 1 .. $max_line ) { - my $ibeg_t = $$ri_first[$line_t]; - my $iend_t = $$ri_last[$line_t]; - last if ( $closing_index <= $ibeg_t ); - - # remember all different indentation objects - my $indentation_t = $leading_spaces_to_go[$ibeg_t]; - $saw_indentation{$indentation_t} = $indentation_t; - $line_count++; - - # remember longest line in the group - my $length_t = total_line_length( $ibeg_t, $iend_t ); - if ( $length_t > $max_length ) { - $max_length = $length_t; - } - } - $right_margin = $rOpts_maximum_line_length - $max_length; - if ( $right_margin < 0 ) { $right_margin = 0 } - } - - my $first_line_comma_count = - grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; - my $comma_count = $indentation->get_COMMA_COUNT(); - my $arrow_count = $indentation->get_ARROW_COUNT(); - - # This is a simple approximate test for vertical alignment: - # if we broke just after an opening paren, brace, bracket, - # and there are 2 or more commas in the first line, - # and there are no '=>'s, - # then we are probably vertically aligned. We could set - # an exact flag in sub scan_list, but this is good - # enough. - my $indentation_count = keys %saw_indentation; - my $is_vertically_aligned = - ( $i == $ibeg - && $first_line_comma_count > 1 - && $indentation_count == 1 - && ( $arrow_count == 0 || $arrow_count == $line_count ) ); - - # Make the move if possible .. - if ( - - # we can always move left - $move_right < 0 - - # but we should only move right if we are sure it will - # not spoil vertical alignment - || ( $comma_count == 0 ) - || ( $comma_count > 0 && !$is_vertically_aligned ) - ) - { - my $move = - ( $move_right <= $right_margin ) - ? $move_right - : $right_margin; - - foreach ( keys %saw_indentation ) { - $saw_indentation{$_} - ->permanently_decrease_AVAILABLE_SPACES( -$move ); - } - } - - # Otherwise, record what we want and the vertical aligner - # will try to recover it. - else { - $indentation->set_RECOVERABLE_SPACES($move_right); - } - } - } - } - return $do_not_pad; -} - -# flush is called to output any tokens in the pipeline, so that -# an alternate source of lines can be written in the correct order - -sub flush { - destroy_one_line_block(); - output_line_to_go(); - Perl::Tidy::VerticalAligner::flush(); -} - -sub reset_block_text_accumulator { - - # save text after 'if' and 'elsif' to append after 'else' - if ($accumulating_text_for_block) { - - if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { - push @{$rleading_block_if_elsif_text}, $leading_block_text; - } - } - $accumulating_text_for_block = ""; - $leading_block_text = ""; - $leading_block_text_level = 0; - $leading_block_text_length_exceeded = 0; - $leading_block_text_line_number = 0; - $leading_block_text_line_length = 0; -} - -sub set_block_text_accumulator { - my $i = shift; - $accumulating_text_for_block = $tokens_to_go[$i]; - if ( $accumulating_text_for_block !~ /^els/ ) { - $rleading_block_if_elsif_text = []; - } - $leading_block_text = ""; - $leading_block_text_level = $levels_to_go[$i]; - $leading_block_text_line_number = - $vertical_aligner_object->get_output_line_number(); - $leading_block_text_length_exceeded = 0; - - # this will contain the column number of the last character - # of the closing side comment - $leading_block_text_line_length = - length($accumulating_text_for_block) + - length( $rOpts->{'closing-side-comment-prefix'} ) + - $leading_block_text_level * $rOpts_indent_columns + 3; -} - -sub accumulate_block_text { - my $i = shift; - - # accumulate leading text for -csc, ignoring any side comments - if ( $accumulating_text_for_block - && !$leading_block_text_length_exceeded - && $types_to_go[$i] ne '#' ) - { - - my $added_length = length( $tokens_to_go[$i] ); - $added_length += 1 if $i == 0; - my $new_line_length = $leading_block_text_line_length + $added_length; - - # we can add this text if we don't exceed some limits.. - if ( - - # we must not have already exceeded the text length limit - length($leading_block_text) < - $rOpts_closing_side_comment_maximum_text - - # and either: - # the new total line length must be below the line length limit - # or the new length must be below the text length limit - # (ie, we may allow one token to exceed the text length limit) - && ( $new_line_length < $rOpts_maximum_line_length - || length($leading_block_text) + $added_length < - $rOpts_closing_side_comment_maximum_text ) - - # UNLESS: we are adding a closing paren before the brace we seek. - # This is an attempt to avoid situations where the ... to be - # added are longer than the omitted right paren, as in: - - # foreach my $item (@a_rather_long_variable_name_here) { - # &whatever; - # } ## end foreach my $item (@a_rather_long_variable_name_here... - - || ( - $tokens_to_go[$i] eq ')' - && ( - ( - $i + 1 <= $max_index_to_go - && $block_type_to_go[ $i + 1 ] eq - $accumulating_text_for_block - ) - || ( $i + 2 <= $max_index_to_go - && $block_type_to_go[ $i + 2 ] eq - $accumulating_text_for_block ) - ) - ) - ) - { - - # add an extra space at each newline - if ( $i == 0 ) { $leading_block_text .= ' ' } - - # add the token text - $leading_block_text .= $tokens_to_go[$i]; - $leading_block_text_line_length = $new_line_length; - } - - # show that text was truncated if necessary - elsif ( $types_to_go[$i] ne 'b' ) { - $leading_block_text_length_exceeded = 1; - $leading_block_text .= '...'; - } - } -} - -{ - my %is_if_elsif_else_unless_while_until_for_foreach; - - BEGIN { - - # These block types may have text between the keyword and opening - # curly. Note: 'else' does not, but must be included to allow trailing - # if/elsif text to be appended. - # patch for SWITCH/CASE: added 'case' and 'when' - @_ = qw(if elsif else unless while until for foreach case when); - @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); - } - - sub accumulate_csc_text { - - # called once per output buffer when -csc is used. Accumulates - # the text placed after certain closing block braces. - # Defines and returns the following for this buffer: - - my $block_leading_text = ""; # the leading text of the last '}' - my $rblock_leading_if_elsif_text; - my $i_block_leading_text = - -1; # index of token owning block_leading_text - my $block_line_count = 100; # how many lines the block spans - my $terminal_type = 'b'; # type of last nonblank token - my $i_terminal = 0; # index of last nonblank token - my $terminal_block_type = ""; - - for my $i ( 0 .. $max_index_to_go ) { - my $type = $types_to_go[$i]; - my $block_type = $block_type_to_go[$i]; - my $token = $tokens_to_go[$i]; - - # remember last nonblank token type - if ( $type ne '#' && $type ne 'b' ) { - $terminal_type = $type; - $terminal_block_type = $block_type; - $i_terminal = $i; - } - - my $type_sequence = $type_sequence_to_go[$i]; - if ( $block_type && $type_sequence ) { - - if ( $token eq '}' ) { - - # restore any leading text saved when we entered this block - if ( defined( $block_leading_text{$type_sequence} ) ) { - ( $block_leading_text, $rblock_leading_if_elsif_text ) = - @{ $block_leading_text{$type_sequence} }; - $i_block_leading_text = $i; - delete $block_leading_text{$type_sequence}; - $rleading_block_if_elsif_text = - $rblock_leading_if_elsif_text; - } - - # if we run into a '}' then we probably started accumulating - # at something like a trailing 'if' clause..no harm done. - if ( $accumulating_text_for_block - && $levels_to_go[$i] <= $leading_block_text_level ) - { - my $lev = $levels_to_go[$i]; - reset_block_text_accumulator(); - } - - if ( defined( $block_opening_line_number{$type_sequence} ) ) - { - my $output_line_number = - $vertical_aligner_object->get_output_line_number(); - $block_line_count = - $output_line_number - - $block_opening_line_number{$type_sequence} + 1; - delete $block_opening_line_number{$type_sequence}; - } - else { - - # Error: block opening line undefined for this line.. - # This shouldn't be possible, but it is not a - # significant problem. - } - } - - elsif ( $token eq '{' ) { - - my $line_number = - $vertical_aligner_object->get_output_line_number(); - $block_opening_line_number{$type_sequence} = $line_number; - - if ( $accumulating_text_for_block - && $levels_to_go[$i] == $leading_block_text_level ) - { - - if ( $accumulating_text_for_block eq $block_type ) { - - # save any leading text before we enter this block - $block_leading_text{$type_sequence} = [ - $leading_block_text, - $rleading_block_if_elsif_text - ]; - $block_opening_line_number{$type_sequence} = - $leading_block_text_line_number; - reset_block_text_accumulator(); - } - else { - - # shouldn't happen, but not a serious error. - # We were accumulating -csc text for block type - # $accumulating_text_for_block and unexpectedly - # encountered a '{' for block type $block_type. - } - } - } - } - - if ( $type eq 'k' - && $csc_new_statement_ok - && $is_if_elsif_else_unless_while_until_for_foreach{$token} - && $token =~ /$closing_side_comment_list_pattern/o ) - { - set_block_text_accumulator($i); - } - else { - - # note: ignoring type 'q' because of tricks being played - # with 'q' for hanging side comments - if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { - $csc_new_statement_ok = - ( $block_type || $type eq 'J' || $type eq ';' ); - } - if ( $type eq ';' - && $accumulating_text_for_block - && $levels_to_go[$i] == $leading_block_text_level ) - { - reset_block_text_accumulator(); - } - else { - accumulate_block_text($i); - } - } - } - - # Treat an 'else' block specially by adding preceding 'if' and - # 'elsif' text. Otherwise, the 'end else' is not helpful, - # especially for cuddled-else formatting. - if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { - $block_leading_text = - make_else_csc_text( $i_terminal, $terminal_block_type, - $block_leading_text, $rblock_leading_if_elsif_text ); - } - - return ( $terminal_type, $i_terminal, $i_block_leading_text, - $block_leading_text, $block_line_count ); - } -} - -sub make_else_csc_text { - - # create additional -csc text for an 'else' and optionally 'elsif', - # depending on the value of switch - # $rOpts_closing_side_comment_else_flag: - # - # = 0 add 'if' text to trailing else - # = 1 same as 0 plus: - # add 'if' to 'elsif's if can fit in line length - # add last 'elsif' to trailing else if can fit in one line - # = 2 same as 1 but do not check if exceed line length - # - # $rif_elsif_text = a reference to a list of all previous closing - # side comments created for this if block - # - my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; - my $csc_text = $block_leading_text; - - if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 ) - { - return $csc_text; - } - - my $count = @{$rif_elsif_text}; - return $csc_text unless ($count); - - my $if_text = '[ if' . $rif_elsif_text->[0]; - - # always show the leading 'if' text on 'else' - if ( $block_type eq 'else' ) { - $csc_text .= $if_text; - } - - # see if that's all - if ( $rOpts_closing_side_comment_else_flag == 0 ) { - return $csc_text; - } - - my $last_elsif_text = ""; - if ( $count > 1 ) { - $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; - if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } - } - - # tentatively append one more item - my $saved_text = $csc_text; - if ( $block_type eq 'else' ) { - $csc_text .= $last_elsif_text; - } - else { - $csc_text .= ' ' . $if_text; - } - - # all done if no length checks requested - if ( $rOpts_closing_side_comment_else_flag == 2 ) { - return $csc_text; - } - - # undo it if line length exceeded - my $length = - length($csc_text) + - length($block_type) + - length( $rOpts->{'closing-side-comment-prefix'} ) + - $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; - if ( $length > $rOpts_maximum_line_length ) { - $csc_text = $saved_text; - } - return $csc_text; -} - -sub add_closing_side_comment { - - # add closing side comments after closing block braces if -csc used - my $cscw_block_comment; - - #--------------------------------------------------------------- - # Step 1: loop through all tokens of this line to accumulate - # the text needed to create the closing side comments. Also see - # how the line ends. - #--------------------------------------------------------------- - - my ( $terminal_type, $i_terminal, $i_block_leading_text, - $block_leading_text, $block_line_count ) - = accumulate_csc_text(); - - #--------------------------------------------------------------- - # Step 2: make the closing side comment if this ends a block - #--------------------------------------------------------------- - my $have_side_comment = $i_terminal != $max_index_to_go; - - # if this line might end in a block closure.. - if ( - $terminal_type eq '}' - - # ..and either - && ( - - # the block is long enough - ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) - - # or there is an existing comment to check - || ( $have_side_comment - && $rOpts->{'closing-side-comment-warnings'} ) - ) - - # .. and if this is one of the types of interest - && $block_type_to_go[$i_terminal] =~ - /$closing_side_comment_list_pattern/o - - # .. but not an anonymous sub - # These are not normally of interest, and their closing braces are - # often followed by commas or semicolons anyway. This also avoids - # possible erratic output due to line numbering inconsistencies - # in the cases where their closing braces terminate a line. - && $block_type_to_go[$i_terminal] ne 'sub' - - # ..and the corresponding opening brace must is not in this batch - # (because we do not need to tag one-line blocks, although this - # should also be caught with a positive -csci value) - && $mate_index_to_go[$i_terminal] < 0 - - # ..and either - && ( - - # this is the last token (line doesnt have a side comment) - !$have_side_comment - - # or the old side comment is a closing side comment - || $tokens_to_go[$max_index_to_go] =~ - /$closing_side_comment_prefix_pattern/o - ) - ) - { - - # then make the closing side comment text - my $token = -"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]"; - - # append any extra descriptive text collected above - if ( $i_block_leading_text == $i_terminal ) { - $token .= $block_leading_text; - } - $token =~ s/\s*$//; # trim any trailing whitespace - - # handle case of existing closing side comment - if ($have_side_comment) { - - # warn if requested and tokens differ significantly - if ( $rOpts->{'closing-side-comment-warnings'} ) { - my $old_csc = $tokens_to_go[$max_index_to_go]; - my $new_csc = $token; - $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' - my $new_trailing_dots = $1; - $old_csc =~ s/\.\.\.\s*$//; - $new_csc =~ s/\s+//g; # trim all whitespace - $old_csc =~ s/\s+//g; - - # Patch to handle multiple closing side comments at - # else and elsif's. These have become too complicated - # to check, so if we see an indication of - # '[ if' or '[ # elsif', then assume they were made - # by perltidy. - if ( $block_type_to_go[$i_terminal] eq 'else' ) { - if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } - } - elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { - if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } - } - - # if old comment is contained in new comment, - # only compare the common part. - if ( length($new_csc) > length($old_csc) ) { - $new_csc = substr( $new_csc, 0, length($old_csc) ); - } - - # if the new comment is shorter and has been limited, - # only compare the common part. - if ( length($new_csc) < length($old_csc) && $new_trailing_dots ) - { - $old_csc = substr( $old_csc, 0, length($new_csc) ); - } - - # any remaining difference? - if ( $new_csc ne $old_csc ) { - - # just leave the old comment if we are below the threshold - # for creating side comments - if ( $block_line_count < - $rOpts->{'closing-side-comment-interval'} ) - { - $token = undef; - } - - # otherwise we'll make a note of it - else { - - warning( -"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" - ); - - # save the old side comment in a new trailing block comment - my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; - $year += 1900; - $month += 1; - $cscw_block_comment = -"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; - } - } - else { - - # No differences.. we can safely delete old comment if we - # are below the threshold - if ( $block_line_count < - $rOpts->{'closing-side-comment-interval'} ) - { - $token = undef; - unstore_token_to_go() - if ( $types_to_go[$max_index_to_go] eq '#' ); - unstore_token_to_go() - if ( $types_to_go[$max_index_to_go] eq 'b' ); - } - } - } - - # switch to the new csc (unless we deleted it!) - $tokens_to_go[$max_index_to_go] = $token if $token; - } - - # handle case of NO existing closing side comment - else { - - # insert the new side comment into the output token stream - my $type = '#'; - my $block_type = ''; - my $type_sequence = ''; - my $container_environment = - $container_environment_to_go[$max_index_to_go]; - my $level = $levels_to_go[$max_index_to_go]; - my $slevel = $nesting_depth_to_go[$max_index_to_go]; - my $no_internal_newlines = 0; - - my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; - my $ci_level = $ci_levels_to_go[$max_index_to_go]; - my $in_continued_quote = 0; - - # first insert a blank token - insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); - - # then the side comment - insert_new_token_to_go( $token, $type, $slevel, - $no_internal_newlines ); - } - } - return $cscw_block_comment; -} - -sub previous_nonblank_token { - my ($i) = @_; - my $name = ""; - my $im = $i - 1; - return "" if ( $im < 0 ); - if ( $types_to_go[$im] eq 'b' ) { $im--; } - return "" if ( $im < 0 ); - $name = $tokens_to_go[$im]; - - # prepend any sub name to an isolated -> to avoid unwanted alignments - # [test case is test8/penco.pl] - if ( $name eq '->' ) { - $im--; - if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { - $name = $tokens_to_go[$im] . $name; - } - } - return $name; -} - -sub send_lines_to_vertical_aligner { - - my ( $ri_first, $ri_last, $do_not_pad ) = @_; - - my $rindentation_list = [0]; # ref to indentations for each line - - # define the array @matching_token_to_go for the output tokens - # which will be non-blank for each special token (such as =>) - # for which alignment is required. - set_vertical_alignment_markers( $ri_first, $ri_last ); - - # flush if necessary to avoid unwanted alignment - my $must_flush = 0; - if ( @$ri_first > 1 ) { - - # flush before a long if statement - if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { - $must_flush = 1; - } - } - if ($must_flush) { - Perl::Tidy::VerticalAligner::flush(); - } - - set_logical_padding( $ri_first, $ri_last ); - - # loop to prepare each line for shipment - my $n_last_line = @$ri_first - 1; - my $in_comma_list; - for my $n ( 0 .. $n_last_line ) { - my $ibeg = $$ri_first[$n]; - my $iend = $$ri_last[$n]; - - my ( $rtokens, $rfields, $rpatterns ) = - make_alignment_patterns( $ibeg, $iend ); - - my ( $indentation, $lev, $level_end, $terminal_type, - $is_semicolon_terminated, $is_outdented_line ) - = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, - $ri_first, $ri_last, $rindentation_list ); - - # we will allow outdenting of long lines.. - my $outdent_long_lines = ( - - # which are long quotes, if allowed - ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) - - # which are long block comments, if allowed - || ( - $types_to_go[$ibeg] eq '#' - && $rOpts->{'outdent-long-comments'} - - # but not if this is a static block comment - && !$is_static_block_comment - ) - ); - - my $level_jump = - $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; - - my $rvertical_tightness_flags = - set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, - $ri_first, $ri_last ); - - # flush an outdented line to avoid any unwanted vertical alignment - Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); - - my $is_terminal_ternary = 0; - if ( $tokens_to_go[$ibeg] eq ':' - || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) - { - if ( ( $terminal_type eq ';' && $level_end <= $lev ) - || ( $level_end < $lev ) ) - { - $is_terminal_ternary = 1; - } - } - - # send this new line down the pipe - my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; - Perl::Tidy::VerticalAligner::append_line( - $lev, - $level_end, - $indentation, - $rfields, - $rtokens, - $rpatterns, - $forced_breakpoint_to_go[$iend] || $in_comma_list, - $outdent_long_lines, - $is_terminal_ternary, - $is_semicolon_terminated, - $do_not_pad, - $rvertical_tightness_flags, - $level_jump, - ); - $in_comma_list = - $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; - - # flush an outdented line to avoid any unwanted vertical alignment - Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); - - $do_not_pad = 0; - - } # end of loop to output each line - - # remember indentation of lines containing opening containers for - # later use by sub set_adjusted_indentation - save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); -} - -{ # begin make_alignment_patterns - - my %block_type_map; - my %keyword_map; - - BEGIN { - - # map related block names into a common name to - # allow alignment - %block_type_map = ( - 'unless' => 'if', - 'else' => 'if', - 'elsif' => 'if', - 'when' => 'if', - 'default' => 'if', - 'case' => 'if', - 'sort' => 'map', - 'grep' => 'map', - ); - - # map certain keywords to the same 'if' class to align - # long if/elsif sequences. [elsif.pl] - %keyword_map = ( - 'unless' => 'if', - 'else' => 'if', - 'elsif' => 'if', - 'when' => 'given', - 'default' => 'given', - 'case' => 'switch', - - # treat an 'undef' similar to numbers and quotes - 'undef' => 'Q', - ); - } - - sub make_alignment_patterns { - - # Here we do some important preliminary work for the - # vertical aligner. We create three arrays for one - # output line. These arrays contain strings that can - # be tested by the vertical aligner to see if - # consecutive lines can be aligned vertically. - # - # The three arrays are indexed on the vertical - # alignment fields and are: - # @tokens - a list of any vertical alignment tokens for this line. - # These are tokens, such as '=' '&&' '#' etc which - # we want to might align vertically. These are - # decorated with various information such as - # nesting depth to prevent unwanted vertical - # alignment matches. - # @fields - the actual text of the line between the vertical alignment - # tokens. - # @patterns - a modified list of token types, one for each alignment - # field. These should normally each match before alignment is - # allowed, even when the alignment tokens match. - my ( $ibeg, $iend ) = @_; - my @tokens = (); - my @fields = (); - my @patterns = (); - my $i_start = $ibeg; - my $i; - - my $depth = 0; - my @container_name = (""); - my @multiple_comma_arrows = (undef); - - my $j = 0; # field index - - $patterns[0] = ""; - for $i ( $ibeg .. $iend ) { - - # Keep track of containers balanced on this line only. - # These are used below to prevent unwanted cross-line alignments. - # Unbalanced containers already avoid aligning across - # container boundaries. - if ( $tokens_to_go[$i] eq '(' ) { - - # if container is balanced on this line... - my $i_mate = $mate_index_to_go[$i]; - if ( $i_mate > $i && $i_mate <= $iend ) { - $depth++; - my $seqno = $type_sequence_to_go[$i]; - my $count = comma_arrow_count($seqno); - $multiple_comma_arrows[$depth] = $count && $count > 1; - - # Append the previous token name to make the container name - # more unique. This name will also be given to any commas - # within this container, and it helps avoid undesirable - # alignments of different types of containers. - my $name = previous_nonblank_token($i); - $name =~ s/^->//; - $container_name[$depth] = "+" . $name; - - # Make the container name even more unique if necessary. - # If we are not vertically aligning this opening paren, - # append a character count to avoid bad alignment because - # it usually looks bad to align commas within continers - # for which the opening parens do not align. Here - # is an example very BAD alignment of commas (because - # the atan2 functions are not all aligned): - # $XY = - # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + - # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - - # $X * atan2( $X, 1 ) - - # $Y * atan2( $Y, 1 ); - # - # On the other hand, it is usually okay to align commas if - # opening parens align, such as: - # glVertex3d( $cx + $s * $xs, $cy, $z ); - # glVertex3d( $cx, $cy + $s * $ys, $z ); - # glVertex3d( $cx - $s * $xs, $cy, $z ); - # glVertex3d( $cx, $cy - $s * $ys, $z ); - # - # To distinguish between these situations, we will - # append the length of the line from the previous matching - # token, or beginning of line, to the function name. This - # will allow the vertical aligner to reject undesirable - # matches. - - # if we are not aligning on this paren... - if ( $matching_token_to_go[$i] eq '' ) { - - # Sum length from previous alignment, or start of line. - # Note that we have to sum token lengths here because - # padding has been done and so array $lengths_to_go - # is now wrong. - my $len = - length( - join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); - $len += leading_spaces_to_go($i_start) - if ( $i_start == $ibeg ); - - # tack length onto the container name to make unique - $container_name[$depth] .= "-" . $len; - } - } - } - elsif ( $tokens_to_go[$i] eq ')' ) { - $depth-- if $depth > 0; - } - - # if we find a new synchronization token, we are done with - # a field - if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { - - my $tok = my $raw_tok = $matching_token_to_go[$i]; - - # make separators in different nesting depths unique - # by appending the nesting depth digit. - if ( $raw_tok ne '#' ) { - $tok .= "$nesting_depth_to_go[$i]"; - } - - # also decorate commas with any container name to avoid - # unwanted cross-line alignments. - if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { - if ( $container_name[$depth] ) { - $tok .= $container_name[$depth]; - } - } - - # Patch to avoid aligning leading and trailing if, unless. - # Mark trailing if, unless statements with container names. - # This makes them different from leading if, unless which - # are not so marked at present. If we ever need to name - # them too, we could use ci to distinguish them. - # Example problem to avoid: - # return ( 2, "DBERROR" ) - # if ( $retval == 2 ); - # if ( scalar @_ ) { - # my ( $a, $b, $c, $d, $e, $f ) = @_; - # } - if ( $raw_tok eq '(' ) { - my $ci = $ci_levels_to_go[$ibeg]; - if ( $container_name[$depth] =~ /^\+(if|unless)/ - && $ci ) - { - $tok .= $container_name[$depth]; - } - } - - # Decorate block braces with block types to avoid - # unwanted alignments such as the following: - # foreach ( @{$routput_array} ) { $fh->print($_) } - # eval { $fh->close() }; - if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { - my $block_type = $block_type_to_go[$i]; - - # map certain related block types to allow - # else blocks to align - $block_type = $block_type_map{$block_type} - if ( defined( $block_type_map{$block_type} ) ); - - # remove sub names to allow one-line sub braces to align - # regardless of name - if ( $block_type =~ /^sub / ) { $block_type = 'sub' } - - # allow all control-type blocks to align - if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } - - $tok .= $block_type; - } - - # concatenate the text of the consecutive tokens to form - # the field - push( @fields, - join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); - - # store the alignment token for this field - push( @tokens, $tok ); - - # get ready for the next batch - $i_start = $i; - $j++; - $patterns[$j] = ""; - } - - # continue accumulating tokens - # handle non-keywords.. - if ( $types_to_go[$i] ne 'k' ) { - my $type = $types_to_go[$i]; - - # Mark most things before arrows as a quote to - # get them to line up. Testfile: mixed.pl. - if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { - my $next_type = $types_to_go[ $i + 1 ]; - my $i_next_nonblank = - ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); - - if ( $types_to_go[$i_next_nonblank] eq '=>' ) { - $type = 'Q'; - - # Patch to ignore leading minus before words, - # by changing pattern 'mQ' into just 'Q', - # so that we can align things like this: - # Button => "Print letter \"~$_\"", - # -command => [ sub { print "$_[0]\n" }, $_ ], - if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } - } - } - - # patch to make numbers and quotes align - if ( $type eq 'n' ) { $type = 'Q' } - - # patch to ignore any ! in patterns - if ( $type eq '!' ) { $type = '' } - - $patterns[$j] .= $type; - } - - # for keywords we have to use the actual text - else { - - my $tok = $tokens_to_go[$i]; - - # but map certain keywords to a common string to allow - # alignment. - $tok = $keyword_map{$tok} - if ( defined( $keyword_map{$tok} ) ); - $patterns[$j] .= $tok; - } - } - - # done with this line .. join text of tokens to make the last field - push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); - return ( \@tokens, \@fields, \@patterns ); - } - -} # end make_alignment_patterns - -{ # begin unmatched_indexes - - # closure to keep track of unbalanced containers. - # arrays shared by the routines in this block: - my @unmatched_opening_indexes_in_this_batch; - my @unmatched_closing_indexes_in_this_batch; - my %comma_arrow_count; - - sub is_unbalanced_batch { - @unmatched_opening_indexes_in_this_batch + - @unmatched_closing_indexes_in_this_batch; - } - - sub comma_arrow_count { - my $seqno = $_[0]; - return $comma_arrow_count{$seqno}; - } - - sub match_opening_and_closing_tokens { - - # Match up indexes of opening and closing braces, etc, in this batch. - # This has to be done after all tokens are stored because unstoring - # of tokens would otherwise cause trouble. - - @unmatched_opening_indexes_in_this_batch = (); - @unmatched_closing_indexes_in_this_batch = (); - %comma_arrow_count = (); - - my ( $i, $i_mate, $token ); - foreach $i ( 0 .. $max_index_to_go ) { - if ( $type_sequence_to_go[$i] ) { - $token = $tokens_to_go[$i]; - if ( $token =~ /^[\(\[\{\?]$/ ) { - push @unmatched_opening_indexes_in_this_batch, $i; - } - elsif ( $token =~ /^[\)\]\}\:]$/ ) { - - $i_mate = pop @unmatched_opening_indexes_in_this_batch; - if ( defined($i_mate) && $i_mate >= 0 ) { - if ( $type_sequence_to_go[$i_mate] == - $type_sequence_to_go[$i] ) - { - $mate_index_to_go[$i] = $i_mate; - $mate_index_to_go[$i_mate] = $i; - } - else { - push @unmatched_opening_indexes_in_this_batch, - $i_mate; - push @unmatched_closing_indexes_in_this_batch, $i; - } - } - else { - push @unmatched_closing_indexes_in_this_batch, $i; - } - } - } - elsif ( $tokens_to_go[$i] eq '=>' ) { - if (@unmatched_opening_indexes_in_this_batch) { - my $j = $unmatched_opening_indexes_in_this_batch[-1]; - my $seqno = $type_sequence_to_go[$j]; - $comma_arrow_count{$seqno}++; - } - } - } - } - - sub save_opening_indentation { - - # This should be called after each batch of tokens is output. It - # saves indentations of lines of all unmatched opening tokens. - # These will be used by sub get_opening_indentation. - - my ( $ri_first, $ri_last, $rindentation_list ) = @_; - - # we no longer need indentations of any saved indentations which - # are unmatched closing tokens in this batch, because we will - # never encounter them again. So we can delete them to keep - # the hash size down. - foreach (@unmatched_closing_indexes_in_this_batch) { - my $seqno = $type_sequence_to_go[$_]; - delete $saved_opening_indentation{$seqno}; - } - - # we need to save indentations of any unmatched opening tokens - # in this batch because we may need them in a subsequent batch. - foreach (@unmatched_opening_indexes_in_this_batch) { - my $seqno = $type_sequence_to_go[$_]; - $saved_opening_indentation{$seqno} = [ - lookup_opening_indentation( - $_, $ri_first, $ri_last, $rindentation_list - ) - ]; - } - } -} # end unmatched_indexes - -sub get_opening_indentation { - - # get the indentation of the line which output the opening token - # corresponding to a given closing token in the current output batch. - # - # given: - # $i_closing - index in this line of a closing token ')' '}' or ']' - # - # $ri_first - reference to list of the first index $i for each output - # line in this batch - # $ri_last - reference to list of the last index $i for each output line - # in this batch - # $rindentation_list - reference to a list containing the indentation - # used for each line. - # - # return: - # -the indentation of the line which contained the opening token - # which matches the token at index $i_opening - # -and its offset (number of columns) from the start of the line - # - my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; - - # first, see if the opening token is in the current batch - my $i_opening = $mate_index_to_go[$i_closing]; - my ( $indent, $offset, $is_leading, $exists ); - $exists = 1; - if ( $i_opening >= 0 ) { - - # it is..look up the indentation - ( $indent, $offset, $is_leading ) = - lookup_opening_indentation( $i_opening, $ri_first, $ri_last, - $rindentation_list ); - } - - # if not, it should have been stored in the hash by a previous batch - else { - my $seqno = $type_sequence_to_go[$i_closing]; - if ($seqno) { - if ( $saved_opening_indentation{$seqno} ) { - ( $indent, $offset, $is_leading ) = - @{ $saved_opening_indentation{$seqno} }; - } - - # some kind of serious error - # (example is badfile.t) - else { - $indent = 0; - $offset = 0; - $is_leading = 0; - $exists = 0; - } - } - - # if no sequence number it must be an unbalanced container - else { - $indent = 0; - $offset = 0; - $is_leading = 0; - $exists = 0; - } - } - return ( $indent, $offset, $is_leading, $exists ); -} - -sub lookup_opening_indentation { - - # get the indentation of the line in the current output batch - # which output a selected opening token - # - # given: - # $i_opening - index of an opening token in the current output batch - # whose line indentation we need - # $ri_first - reference to list of the first index $i for each output - # line in this batch - # $ri_last - reference to list of the last index $i for each output line - # in this batch - # $rindentation_list - reference to a list containing the indentation - # used for each line. (NOTE: the first slot in - # this list is the last returned line number, and this is - # followed by the list of indentations). - # - # return - # -the indentation of the line which contained token $i_opening - # -and its offset (number of columns) from the start of the line - - my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; - - my $nline = $rindentation_list->[0]; # line number of previous lookup - - # reset line location if necessary - $nline = 0 if ( $i_opening < $ri_start->[$nline] ); - - # find the correct line - unless ( $i_opening > $ri_last->[-1] ) { - while ( $i_opening > $ri_last->[$nline] ) { $nline++; } - } - - # error - token index is out of bounds - shouldn't happen - else { - warning( -"non-fatal program bug in lookup_opening_indentation - index out of range\n" - ); - report_definite_bug(); - $nline = $#{$ri_last}; - } - - $rindentation_list->[0] = - $nline; # save line number to start looking next call - my $ibeg = $ri_start->[$nline]; - my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; - my $is_leading = ( $ibeg == $i_opening ); - return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); -} - -{ - my %is_if_elsif_else_unless_while_until_for_foreach; - - BEGIN { - - # These block types may have text between the keyword and opening - # curly. Note: 'else' does not, but must be included to allow trailing - # if/elsif text to be appended. - # patch for SWITCH/CASE: added 'case' and 'when' - @_ = qw(if elsif else unless while until for foreach case when); - @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); - } - - sub set_adjusted_indentation { - - # This routine has the final say regarding the actual indentation of - # a line. It starts with the basic indentation which has been - # defined for the leading token, and then takes into account any - # options that the user has set regarding special indenting and - # outdenting. - - my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, - $rindentation_list ) - = @_; - - # we need to know the last token of this line - my ( $terminal_type, $i_terminal ) = - terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); - - my $is_outdented_line = 0; - - my $is_semicolon_terminated = $terminal_type eq ';' - && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; - - ########################################################## - # Section 1: set a flag and a default indentation - # - # Most lines are indented according to the initial token. - # But it is common to outdent to the level just after the - # terminal token in certain cases... - # adjust_indentation flag: - # 0 - do not adjust - # 1 - outdent - # 2 - vertically align with opening token - # 3 - indent - ########################################################## - my $adjust_indentation = 0; - my $default_adjust_indentation = $adjust_indentation; - - my ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ); - - # if we are at a closing token of some type.. - if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) { - - # get the indentation of the line containing the corresponding - # opening token - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - - # First set the default behavior: - # default behavior is to outdent closing lines - # of the form: "); }; ]; )->xxx;" - if ( - $is_semicolon_terminated - - # and 'cuddled parens' of the form: ")->pack(" - || ( - $terminal_type eq '(' - && $types_to_go[$ibeg] eq ')' - && ( $nesting_depth_to_go[$iend] + 1 == - $nesting_depth_to_go[$ibeg] ) - ) - ) - { - $adjust_indentation = 1; - } - - # TESTING: outdent something like '),' - if ( - $terminal_type eq ',' - - # allow just one character before the comma - && $i_terminal == $ibeg + 1 - - # requre LIST environment; otherwise, we may outdent too much -- - # this can happen in calls without parentheses (overload.t); - && $container_environment_to_go[$i_terminal] eq 'LIST' - ) - { - $adjust_indentation = 1; - } - - # undo continuation indentation of a terminal closing token if - # it is the last token before a level decrease. This will allow - # a closing token to line up with its opening counterpart, and - # avoids a indentation jump larger than 1 level. - if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ - && $i_terminal == $ibeg ) - { - my $ci = $ci_levels_to_go[$ibeg]; - my $lev = $levels_to_go[$ibeg]; - my $next_type = $types_to_go[ $ibeg + 1 ]; - my $i_next_nonblank = - ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); - if ( $i_next_nonblank <= $max_index_to_go - && $levels_to_go[$i_next_nonblank] < $lev ) - { - $adjust_indentation = 1; - } - } - - $default_adjust_indentation = $adjust_indentation; - - # Now modify default behavior according to user request: - # handle option to indent non-blocks of the form ); }; ]; - # But don't do special indentation to something like ')->pack(' - if ( !$block_type_to_go[$ibeg] ) { - my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; - if ( $cti == 1 ) { - if ( $i_terminal <= $ibeg + 1 - || $is_semicolon_terminated ) - { - $adjust_indentation = 2; - } - else { - $adjust_indentation = 0; - } - } - elsif ( $cti == 2 ) { - if ($is_semicolon_terminated) { - $adjust_indentation = 3; - } - else { - $adjust_indentation = 0; - } - } - elsif ( $cti == 3 ) { - $adjust_indentation = 3; - } - } - - # handle option to indent blocks - else { - if ( - $rOpts->{'indent-closing-brace'} - && ( - $i_terminal == $ibeg # isolated terminal '}' - || $is_semicolon_terminated - ) - ) # } xxxx ; - { - $adjust_indentation = 3; - } - } - } - - # if at ');', '};', '>;', and '];' of a terminal qw quote - elsif ($$rpatterns[0] =~ /^qb*;$/ - && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) - { - if ( $closing_token_indentation{$1} == 0 ) { - $adjust_indentation = 1; - } - else { - $adjust_indentation = 3; - } - } - - # if line begins with a ':', align it with any - # previous line leading with corresponding ? - elsif ( $types_to_go[$ibeg] eq ':' ) { - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - if ($is_leading) { $adjust_indentation = 2; } - } - - ########################################################## - # Section 2: set indentation according to flag set above - # - # Select the indentation object to define leading - # whitespace. If we are outdenting something like '} } );' - # then we want to use one level below the last token - # ($i_terminal) in order to get it to fully outdent through - # all levels. - ########################################################## - my $indentation; - my $lev; - my $level_end = $levels_to_go[$iend]; - - if ( $adjust_indentation == 0 ) { - $indentation = $leading_spaces_to_go[$ibeg]; - $lev = $levels_to_go[$ibeg]; - } - elsif ( $adjust_indentation == 1 ) { - $indentation = $reduced_spaces_to_go[$i_terminal]; - $lev = $levels_to_go[$i_terminal]; - } - - # handle indented closing token which aligns with opening token - elsif ( $adjust_indentation == 2 ) { - - # handle option to align closing token with opening token - $lev = $levels_to_go[$ibeg]; - - # calculate spaces needed to align with opening token - my $space_count = - get_SPACES($opening_indentation) + $opening_offset; - - # Indent less than the previous line. - # - # Problem: For -lp we don't exactly know what it was if there - # were recoverable spaces sent to the aligner. A good solution - # would be to force a flush of the vertical alignment buffer, so - # that we would know. For now, this rule is used for -lp: - # - # When the last line did not start with a closing token we will - # be optimistic that the aligner will recover everything wanted. - # - # This rule will prevent us from breaking a hierarchy of closing - # tokens, and in a worst case will leave a closing paren too far - # indented, but this is better than frequently leaving it not - # indented enough. - my $last_spaces = get_SPACES($last_indentation_written); - if ( $last_leading_token !~ /^[\}\]\)]$/ ) { - $last_spaces += - get_RECOVERABLE_SPACES($last_indentation_written); - } - - # reset the indentation to the new space count if it works - # only options are all or none: nothing in-between looks good - $lev = $levels_to_go[$ibeg]; - if ( $space_count < $last_spaces ) { - if ($rOpts_line_up_parentheses) { - my $lev = $levels_to_go[$ibeg]; - $indentation = - new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); - } - else { - $indentation = $space_count; - } - } - - # revert to default if it doesnt work - else { - $space_count = leading_spaces_to_go($ibeg); - if ( $default_adjust_indentation == 0 ) { - $indentation = $leading_spaces_to_go[$ibeg]; - } - elsif ( $default_adjust_indentation == 1 ) { - $indentation = $reduced_spaces_to_go[$i_terminal]; - $lev = $levels_to_go[$i_terminal]; - } - } - } - - # Full indentaion of closing tokens (-icb and -icp or -cti=2) - else { - - # handle -icb (indented closing code block braces) - # Updated method for indented block braces: indent one full level if - # there is no continuation indentation. This will occur for major - # structures such as sub, if, else, but not for things like map - # blocks. - # - # Note: only code blocks without continuation indentation are - # handled here (if, else, unless, ..). In the following snippet, - # the terminal brace of the sort block will have continuation - # indentation as shown so it will not be handled by the coding - # here. We would have to undo the continuation indentation to do - # this, but it probably looks ok as is. This is a possible future - # update for semicolon terminated lines. - # - # if ($sortby eq 'date' or $sortby eq 'size') { - # @files = sort { - # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} - # or $a cmp $b - # } @files; - # } - # - if ( $block_type_to_go[$ibeg] - && $ci_levels_to_go[$i_terminal] == 0 ) - { - my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] ); - $indentation = $spaces + $rOpts_indent_columns; - - # NOTE: for -lp we could create a new indentation object, but - # there is probably no need to do it - } - - # handle -icp and any -icb block braces which fall through above - # test such as the 'sort' block mentioned above. - else { - - # There are currently two ways to handle -icp... - # One way is to use the indentation of the previous line: - # $indentation = $last_indentation_written; - - # The other way is to use the indentation that the previous line - # would have had if it hadn't been adjusted: - $indentation = $last_unadjusted_indentation; - - # Current method: use the minimum of the two. This avoids - # inconsistent indentation. - if ( get_SPACES($last_indentation_written) < - get_SPACES($indentation) ) - { - $indentation = $last_indentation_written; - } - } - - # use previous indentation but use own level - # to cause list to be flushed properly - $lev = $levels_to_go[$ibeg]; - } - - # remember indentation except for multi-line quotes, which get - # no indentation - unless ( $ibeg == 0 && $starting_in_quote ) { - $last_indentation_written = $indentation; - $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; - $last_leading_token = $tokens_to_go[$ibeg]; - } - - # be sure lines with leading closing tokens are not outdented more - # than the line which contained the corresponding opening token. - - ############################################################# - # updated per bug report in alex_bug.pl: we must not - # mess with the indentation of closing logical braces so - # we must treat something like '} else {' as if it were - # an isolated brace my $is_isolated_block_brace = ( - # $iend == $ibeg ) && $block_type_to_go[$ibeg]; - ############################################################# - my $is_isolated_block_brace = $block_type_to_go[$ibeg] - && ( $iend == $ibeg - || $is_if_elsif_else_unless_while_until_for_foreach{ - $block_type_to_go[$ibeg] } ); - - # only do this for a ':; which is aligned with its leading '?' - my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; - if ( defined($opening_indentation) - && !$is_isolated_block_brace - && !$is_unaligned_colon ) - { - if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { - $indentation = $opening_indentation; - } - } - - # remember the indentation of each line of this batch - push @{$rindentation_list}, $indentation; - - # outdent lines with certain leading tokens... - if ( - - # must be first word of this batch - $ibeg == 0 - - # and ... - && ( - - # certain leading keywords if requested - ( - $rOpts->{'outdent-keywords'} - && $types_to_go[$ibeg] eq 'k' - && $outdent_keyword{ $tokens_to_go[$ibeg] } - ) - - # or labels if requested - || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) - - # or static block comments if requested - || ( $types_to_go[$ibeg] eq '#' - && $rOpts->{'outdent-static-block-comments'} - && $is_static_block_comment ) - ) - ) - - { - my $space_count = leading_spaces_to_go($ibeg); - if ( $space_count > 0 ) { - $space_count -= $rOpts_continuation_indentation; - $is_outdented_line = 1; - if ( $space_count < 0 ) { $space_count = 0 } - - # do not promote a spaced static block comment to non-spaced; - # this is not normally necessary but could be for some - # unusual user inputs (such as -ci = -i) - if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { - $space_count = 1; - } - - if ($rOpts_line_up_parentheses) { - $indentation = - new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); - } - else { - $indentation = $space_count; - } - } - } - - return ( $indentation, $lev, $level_end, $terminal_type, - $is_semicolon_terminated, $is_outdented_line ); - } -} - -sub set_vertical_tightness_flags { - - my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; - - # Define vertical tightness controls for the nth line of a batch. - # We create an array of parameters which tell the vertical aligner - # if we should combine this line with the next line to achieve the - # desired vertical tightness. The array of parameters contains: - # - # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace - # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok - # if closing: spaces of padding to use - # [2] sequence number of container - # [3] valid flag: do not append if this flag is false. Will be - # true if appropriate -vt flag is set. Otherwise, Will be - # made true only for 2 line container in parens with -lp - # - # These flags are used by sub set_leading_whitespace in - # the vertical aligner - - my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; - - # For non-BLOCK tokens, we will need to examine the next line - # too, so we won't consider the last line. - if ( $n < $n_last_line ) { - - # see if last token is an opening token...not a BLOCK... - my $ibeg_next = $$ri_first[ $n + 1 ]; - my $token_end = $tokens_to_go[$iend]; - my $iend_next = $$ri_last[ $n + 1 ]; - if ( - $type_sequence_to_go[$iend] - && !$block_type_to_go[$iend] - && $is_opening_token{$token_end} - && ( - $opening_vertical_tightness{$token_end} > 0 - - # allow 2-line method call to be closed up - || ( $rOpts_line_up_parentheses - && $token_end eq '(' - && $iend > $ibeg - && $types_to_go[ $iend - 1 ] ne 'b' ) - ) - ) - { - - # avoid multiple jumps in nesting depth in one line if - # requested - my $ovt = $opening_vertical_tightness{$token_end}; - my $iend_next = $$ri_last[ $n + 1 ]; - unless ( - $ovt < 2 - && ( $nesting_depth_to_go[ $iend_next + 1 ] != - $nesting_depth_to_go[$ibeg_next] ) - ) - { - - # If -vt flag has not been set, mark this as invalid - # and aligner will validate it if it sees the closing paren - # within 2 lines. - my $valid_flag = $ovt; - @{$rvertical_tightness_flags} = - ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); - } - } - - # see if first token of next line is a closing token... - # ..and be sure this line does not have a side comment - my $token_next = $tokens_to_go[$ibeg_next]; - if ( $type_sequence_to_go[$ibeg_next] - && !$block_type_to_go[$ibeg_next] - && $is_closing_token{$token_next} - && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! - { - my $ovt = $opening_vertical_tightness{$token_next}; - my $cvt = $closing_vertical_tightness{$token_next}; - if ( - - # never append a trailing line like )->pack( - # because it will throw off later alignment - ( - $nesting_depth_to_go[$ibeg_next] == - $nesting_depth_to_go[ $iend_next + 1 ] + 1 - ) - && ( - $cvt == 2 - || ( - $container_environment_to_go[$ibeg_next] ne 'LIST' - && ( - $cvt == 1 - - # allow closing up 2-line method calls - || ( $rOpts_line_up_parentheses - && $token_next eq ')' ) - ) - ) - ) - ) - { - - # decide which trailing closing tokens to append.. - my $ok = 0; - if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } - else { - my $str = join( '', - @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); - - # append closing token if followed by comment or ';' - if ( $str =~ /^b?[#;]/ ) { $ok = 1 } - } - - if ($ok) { - my $valid_flag = $cvt; - @{$rvertical_tightness_flags} = ( - 2, - $tightness{$token_next} == 2 ? 0 : 1, - $type_sequence_to_go[$ibeg_next], $valid_flag, - ); - } - } - } - - # Opening Token Right - # If requested, move an isolated trailing opening token to the end of - # the previous line which ended in a comma. We could do this - # in sub recombine_breakpoints but that would cause problems - # with -lp formatting. The problem is that indentation will - # quickly move far to the right in nested expressions. By - # doing it after indentation has been set, we avoid changes - # to the indentation. Actual movement of the token takes place - # in sub write_leader_and_string. - if ( - $opening_token_right{ $tokens_to_go[$ibeg_next] } - - # previous line is not opening - # (use -sot to combine with it) - && !$is_opening_token{$token_end} - - # previous line ended in one of these - # (add other cases if necessary; '=>' and '.' are not necessary - ##&& ($is_opening_token{$token_end} || $token_end eq ',') - && !$block_type_to_go[$ibeg_next] - - # this is a line with just an opening token - && ( $iend_next == $ibeg_next - || $iend_next == $ibeg_next + 2 - && $types_to_go[$iend_next] eq '#' ) - - # looks bad if we align vertically with the wrong container - && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] - ) - { - my $valid_flag = 1; - my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; - @{$rvertical_tightness_flags} = - ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); - } - - # Stacking of opening and closing tokens - my $stackable; - my $token_beg_next = $tokens_to_go[$ibeg_next]; - - # patch to make something like 'qw(' behave like an opening paren - # (aran.t) - if ( $types_to_go[$ibeg_next] eq 'q' ) { - if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { - $token_beg_next = $1; - } - } - - if ( $is_closing_token{$token_end} - && $is_closing_token{$token_beg_next} ) - { - $stackable = $stack_closing_token{$token_beg_next} - unless ( $block_type_to_go[$ibeg_next] ) - ; # shouldn't happen; just checking - } - elsif ($is_opening_token{$token_end} - && $is_opening_token{$token_beg_next} ) - { - $stackable = $stack_opening_token{$token_beg_next} - unless ( $block_type_to_go[$ibeg_next] ) - ; # shouldn't happen; just checking - } - - if ($stackable) { - - my $is_semicolon_terminated; - if ( $n + 1 == $n_last_line ) { - my ( $terminal_type, $i_terminal ) = terminal_type( - \@types_to_go, \@block_type_to_go, - $ibeg_next, $iend_next - ); - $is_semicolon_terminated = $terminal_type eq ';' - && $nesting_depth_to_go[$iend_next] < - $nesting_depth_to_go[$ibeg_next]; - } - - # this must be a line with just an opening token - # or end in a semicolon - if ( - $is_semicolon_terminated - || ( $iend_next == $ibeg_next - || $iend_next == $ibeg_next + 2 - && $types_to_go[$iend_next] eq '#' ) - ) - { - my $valid_flag = 1; - my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; - @{$rvertical_tightness_flags} = - ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, - ); - } - } - } - - # Check for a last line with isolated opening BLOCK curly - elsif ($rOpts_block_brace_vertical_tightness - && $ibeg eq $iend - && $types_to_go[$iend] eq '{' - && $block_type_to_go[$iend] =~ - /$block_brace_vertical_tightness_pattern/o ) - { - @{$rvertical_tightness_flags} = - ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); - } - - # pack in the sequence numbers of the ends of this line - $rvertical_tightness_flags->[4] = get_seqno($ibeg); - $rvertical_tightness_flags->[5] = get_seqno($iend); - return $rvertical_tightness_flags; -} - -sub get_seqno { - - # get opening and closing sequence numbers of a token for the vertical - # aligner. Assign qw quotes a value to allow qw opening and closing tokens - # to be treated somewhat like opening and closing tokens for stacking - # tokens by the vertical aligner. - my ($ii) = @_; - my $seqno = $type_sequence_to_go[$ii]; - if ( $types_to_go[$ii] eq 'q' ) { - my $SEQ_QW = -1; - if ( $ii > 0 ) { - $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); - } - else { - if ( !$ending_in_quote ) { - $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); - } - } - } - return ($seqno); -} - -{ - my %is_vertical_alignment_type; - my %is_vertical_alignment_keyword; - - BEGIN { - - @_ = qw# - = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= - { ? : => =~ && || // ~~ !~~ - #; - @is_vertical_alignment_type{@_} = (1) x scalar(@_); - - @_ = qw(if unless and or err eq ne for foreach while until); - @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); - } - - sub set_vertical_alignment_markers { - - # This routine takes the first step toward vertical alignment of the - # lines of output text. It looks for certain tokens which can serve as - # vertical alignment markers (such as an '='). - # - # Method: We look at each token $i in this output batch and set - # $matching_token_to_go[$i] equal to those tokens at which we would - # accept vertical alignment. - - # nothing to do if we aren't allowed to change whitespace - if ( !$rOpts_add_whitespace ) { - for my $i ( 0 .. $max_index_to_go ) { - $matching_token_to_go[$i] = ''; - } - return; - } - - my ( $ri_first, $ri_last ) = @_; - - # remember the index of last nonblank token before any sidecomment - my $i_terminal = $max_index_to_go; - if ( $types_to_go[$i_terminal] eq '#' ) { - if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { - if ( $i_terminal > 0 ) { --$i_terminal } - } - } - - # look at each line of this batch.. - my $last_vertical_alignment_before_index; - my $vert_last_nonblank_type; - my $vert_last_nonblank_token; - my $vert_last_nonblank_block_type; - my $max_line = @$ri_first - 1; - my ( $i, $type, $token, $block_type, $alignment_type ); - my ( $ibeg, $iend, $line ); - - foreach $line ( 0 .. $max_line ) { - $ibeg = $$ri_first[$line]; - $iend = $$ri_last[$line]; - $last_vertical_alignment_before_index = -1; - $vert_last_nonblank_type = ''; - $vert_last_nonblank_token = ''; - $vert_last_nonblank_block_type = ''; - - # look at each token in this output line.. - foreach $i ( $ibeg .. $iend ) { - $alignment_type = ''; - $type = $types_to_go[$i]; - $block_type = $block_type_to_go[$i]; - $token = $tokens_to_go[$i]; - - # check for flag indicating that we should not align - # this token - if ( $matching_token_to_go[$i] ) { - $matching_token_to_go[$i] = ''; - next; - } - - #-------------------------------------------------------- - # First see if we want to align BEFORE this token - #-------------------------------------------------------- - - # The first possible token that we can align before - # is index 2 because: 1) it doesn't normally make sense to - # align before the first token and 2) the second - # token must be a blank if we are to align before - # the third - if ( $i < $ibeg + 2 ) { } - - # must follow a blank token - elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } - - # align a side comment -- - elsif ( $type eq '#' ) { - - unless ( - - # it is a static side comment - ( - $rOpts->{'static-side-comments'} - && $token =~ /$static_side_comment_pattern/o - ) - - # or a closing side comment - || ( $vert_last_nonblank_block_type - && $token =~ - /$closing_side_comment_prefix_pattern/o ) - ) - { - $alignment_type = $type; - } ## Example of a static side comment - } - - # otherwise, do not align two in a row to create a - # blank field - elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } - - # align before one of these keywords - # (within a line, since $i>1) - elsif ( $type eq 'k' ) { - - # /^(if|unless|and|or|eq|ne)$/ - if ( $is_vertical_alignment_keyword{$token} ) { - $alignment_type = $token; - } - } - - # align before one of these types.. - # Note: add '.' after new vertical aligner is operational - elsif ( $is_vertical_alignment_type{$type} ) { - $alignment_type = $token; - - # Do not align a terminal token. Although it might - # occasionally look ok to do this, it has been found to be - # a good general rule. The main problems are: - # (1) that the terminal token (such as an = or :) might get - # moved far to the right where it is hard to see because - # nothing follows it, and - # (2) doing so may prevent other good alignments. - if ( $i == $iend || $i >= $i_terminal ) { - $alignment_type = ""; - } - - # Do not align leading ': (' or '. ('. This would prevent - # alignment in something like the following: - # $extra_space .= - # ( $input_line_number < 10 ) ? " " - # : ( $input_line_number < 100 ) ? " " - # : ""; - # or - # $code = - # ( $case_matters ? $accessor : " lc($accessor) " ) - # . ( $yesno ? " eq " : " ne " ) - if ( $i == $ibeg + 2 - && $types_to_go[$ibeg] =~ /^[\.\:]$/ - && $types_to_go[ $i - 1 ] eq 'b' ) - { - $alignment_type = ""; - } - - # For a paren after keyword, only align something like this: - # if ( $a ) { &a } - # elsif ( $b ) { &b } - if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { - $alignment_type = "" - unless $vert_last_nonblank_token =~ - /^(if|unless|elsif)$/; - } - - # be sure the alignment tokens are unique - # This didn't work well: reason not determined - # if ($token ne $type) {$alignment_type .= $type} - } - - # NOTE: This is deactivated because it causes the previous - # if/elsif alignment to fail - #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) - #{ $alignment_type = $type; } - - if ($alignment_type) { - $last_vertical_alignment_before_index = $i; - } - - #-------------------------------------------------------- - # Next see if we want to align AFTER the previous nonblank - #-------------------------------------------------------- - - # We want to line up ',' and interior ';' tokens, with the added - # space AFTER these tokens. (Note: interior ';' is included - # because it may occur in short blocks). - if ( - - # we haven't already set it - !$alignment_type - - # and its not the first token of the line - && ( $i > $ibeg ) - - # and it follows a blank - && $types_to_go[ $i - 1 ] eq 'b' - - # and previous token IS one of these: - && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) - - # and it's NOT one of these - && ( $type !~ /^[b\#\)\]\}]$/ ) - - # then go ahead and align - ) - - { - $alignment_type = $vert_last_nonblank_type; - } - - #-------------------------------------------------------- - # then store the value - #-------------------------------------------------------- - $matching_token_to_go[$i] = $alignment_type; - if ( $type ne 'b' ) { - $vert_last_nonblank_type = $type; - $vert_last_nonblank_token = $token; - $vert_last_nonblank_block_type = $block_type; - } - } - } - } -} - -sub terminal_type { - - # returns type of last token on this line (terminal token), as follows: - # returns # for a full-line comment - # returns ' ' for a blank line - # otherwise returns final token type - - my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; - - # check for full-line comment.. - if ( $$rtype[$ibeg] eq '#' ) { - return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; - } - else { - - # start at end and walk bakwards.. - for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { - - # skip past any side comment and blanks - next if ( $$rtype[$i] eq 'b' ); - next if ( $$rtype[$i] eq '#' ); - - # found it..make sure it is a BLOCK termination, - # but hide a terminal } after sort/grep/map because it is not - # necessarily the end of the line. (terminal.t) - my $terminal_type = $$rtype[$i]; - if ( - $terminal_type eq '}' - && ( !$$rblock_type[$i] - || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) ) - ) - { - $terminal_type = 'b'; - } - return wantarray ? ( $terminal_type, $i ) : $terminal_type; - } - - # empty line - return wantarray ? ( ' ', $ibeg ) : ' '; - } -} - -{ - my %is_good_keyword_breakpoint; - my %is_lt_gt_le_ge; - - sub set_bond_strengths { - - BEGIN { - - @_ = qw(if unless while until for foreach); - @is_good_keyword_breakpoint{@_} = (1) x scalar(@_); - - @_ = qw(lt gt le ge); - @is_lt_gt_le_ge{@_} = (1) x scalar(@_); - - ############################################################### - # NOTE: NO_BREAK's set here are HINTS which may not be honored; - # essential NO_BREAKS's must be enforced in section 2, below. - ############################################################### - - # adding NEW_TOKENS: add a left and right bond strength by - # mimmicking what is done for an existing token type. You - # can skip this step at first and take the default, then - # tweak later to get desired results. - - # The bond strengths should roughly follow precenence order where - # possible. If you make changes, please check the results very - # carefully on a variety of scripts. - - # no break around possible filehandle - $left_bond_strength{'Z'} = NO_BREAK; - $right_bond_strength{'Z'} = NO_BREAK; - - # never put a bare word on a new line: - # example print (STDERR, "bla"); will fail with break after ( - $left_bond_strength{'w'} = NO_BREAK; - - # blanks always have infinite strength to force breaks after real tokens - $right_bond_strength{'b'} = NO_BREAK; - - # try not to break on exponentation - @_ = qw" ** .. ... <=> "; - @left_bond_strength{@_} = (STRONG) x scalar(@_); - @right_bond_strength{@_} = (STRONG) x scalar(@_); - - # The comma-arrow has very low precedence but not a good break point - $left_bond_strength{'=>'} = NO_BREAK; - $right_bond_strength{'=>'} = NOMINAL; - - # ok to break after label - $left_bond_strength{'J'} = NO_BREAK; - $right_bond_strength{'J'} = NOMINAL; - $left_bond_strength{'j'} = STRONG; - $right_bond_strength{'j'} = STRONG; - $left_bond_strength{'A'} = STRONG; - $right_bond_strength{'A'} = STRONG; - - $left_bond_strength{'->'} = STRONG; - $right_bond_strength{'->'} = VERY_STRONG; - - # breaking AFTER modulus operator is ok: - @_ = qw" % "; - @left_bond_strength{@_} = (STRONG) x scalar(@_); - @right_bond_strength{@_} = - ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_); - - # Break AFTER math operators * and / - @_ = qw" * / x "; - @left_bond_strength{@_} = (STRONG) x scalar(@_); - @right_bond_strength{@_} = (NOMINAL) x scalar(@_); - - # Break AFTER weakest math operators + and - - # Make them weaker than * but a bit stronger than '.' - @_ = qw" + - "; - @left_bond_strength{@_} = (STRONG) x scalar(@_); - @right_bond_strength{@_} = - ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_); - - # breaking BEFORE these is just ok: - @_ = qw" >> << "; - @right_bond_strength{@_} = (STRONG) x scalar(@_); - @left_bond_strength{@_} = (NOMINAL) x scalar(@_); - - # breaking before the string concatenation operator seems best - # because it can be hard to see at the end of a line - $right_bond_strength{'.'} = STRONG; - $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; - - @_ = qw"} ] ) "; - @left_bond_strength{@_} = (STRONG) x scalar(@_); - @right_bond_strength{@_} = (NOMINAL) x scalar(@_); - - # make these a little weaker than nominal so that they get - # favored for end-of-line characters - @_ = qw"!= == =~ !~ ~~ !~~"; - @left_bond_strength{@_} = (STRONG) x scalar(@_); - @right_bond_strength{@_} = - ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); - - # break AFTER these - @_ = qw" < > | & >= <="; - @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); - @right_bond_strength{@_} = - ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); - - # breaking either before or after a quote is ok - # but bias for breaking before a quote - $left_bond_strength{'Q'} = NOMINAL; - $right_bond_strength{'Q'} = NOMINAL + 0.02; - $left_bond_strength{'q'} = NOMINAL; - $right_bond_strength{'q'} = NOMINAL; - - # starting a line with a keyword is usually ok - $left_bond_strength{'k'} = NOMINAL; - - # we usually want to bond a keyword strongly to what immediately - # follows, rather than leaving it stranded at the end of a line - $right_bond_strength{'k'} = STRONG; - - $left_bond_strength{'G'} = NOMINAL; - $right_bond_strength{'G'} = STRONG; - - # it is good to break AFTER various assignment operators - @_ = qw( - = **= += *= &= <<= &&= - -= /= |= >>= ||= //= - .= %= ^= - x= - ); - @left_bond_strength{@_} = (STRONG) x scalar(@_); - @right_bond_strength{@_} = - ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); - - # break BEFORE '&&' and '||' and '//' - # set strength of '||' to same as '=' so that chains like - # $a = $b || $c || $d will break before the first '||' - $right_bond_strength{'||'} = NOMINAL; - $left_bond_strength{'||'} = $right_bond_strength{'='}; - - # same thing for '//' - $right_bond_strength{'//'} = NOMINAL; - $left_bond_strength{'//'} = $right_bond_strength{'='}; - - # set strength of && a little higher than || - $right_bond_strength{'&&'} = NOMINAL; - $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; - - $left_bond_strength{';'} = VERY_STRONG; - $right_bond_strength{';'} = VERY_WEAK; - $left_bond_strength{'f'} = VERY_STRONG; - - # make right strength of for ';' a little less than '=' - # to make for contents break after the ';' to avoid this: - # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += - # $number_of_fields ) - # and make it weaker than ',' and 'and' too - $right_bond_strength{'f'} = VERY_WEAK - 0.03; - - # The strengths of ?/: should be somewhere between - # an '=' and a quote (NOMINAL), - # make strength of ':' slightly less than '?' to help - # break long chains of ? : after the colons - $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; - $right_bond_strength{':'} = NO_BREAK; - $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; - $right_bond_strength{'?'} = NO_BREAK; - - $left_bond_strength{','} = VERY_STRONG; - $right_bond_strength{','} = VERY_WEAK; - - # Set bond strengths of certain keywords - # make 'or', 'err', 'and' slightly weaker than a ',' - $left_bond_strength{'and'} = VERY_WEAK - 0.01; - $left_bond_strength{'or'} = VERY_WEAK - 0.02; - $left_bond_strength{'err'} = VERY_WEAK - 0.02; - $left_bond_strength{'xor'} = NOMINAL; - $right_bond_strength{'and'} = NOMINAL; - $right_bond_strength{'or'} = NOMINAL; - $right_bond_strength{'err'} = NOMINAL; - $right_bond_strength{'xor'} = STRONG; - } - - # patch-its always ok to break at end of line - $nobreak_to_go[$max_index_to_go] = 0; - - # adding a small 'bias' to strengths is a simple way to make a line - # break at the first of a sequence of identical terms. For example, - # to force long string of conditional operators to break with - # each line ending in a ':', we can add a small number to the bond - # strength of each ':' - my $colon_bias = 0; - my $amp_bias = 0; - my $bar_bias = 0; - my $and_bias = 0; - my $or_bias = 0; - my $dot_bias = 0; - my $f_bias = 0; - my $code_bias = -.01; - my $type = 'b'; - my $token = ' '; - my $last_type; - my $last_nonblank_type = $type; - my $last_nonblank_token = $token; - my $delta_bias = 0.0001; - my $list_str = $left_bond_strength{'?'}; - - my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, - $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, - ); - - # preliminary loop to compute bond strengths - for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { - $last_type = $type; - if ( $type ne 'b' ) { - $last_nonblank_type = $type; - $last_nonblank_token = $token; - } - $type = $types_to_go[$i]; - - # strength on both sides of a blank is the same - if ( $type eq 'b' && $last_type ne 'b' ) { - $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; - next; - } - - $token = $tokens_to_go[$i]; - $block_type = $block_type_to_go[$i]; - $i_next = $i + 1; - $next_type = $types_to_go[$i_next]; - $next_token = $tokens_to_go[$i_next]; - $total_nesting_depth = $nesting_depth_to_go[$i_next]; - $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - - # Some token chemistry... The decision about where to break a - # line depends upon a "bond strength" between tokens. The LOWER - # the bond strength, the MORE likely a break. The strength - # values are based on trial-and-error, and need to be tweaked - # occasionally to get desired results. Things to keep in mind - # are: - # 1. relative strengths are important. small differences - # in strengths can make big formatting differences. - # 2. each indentation level adds one unit of bond strength - # 3. a value of NO_BREAK makes an unbreakable bond - # 4. a value of VERY_WEAK is the strength of a ',' - # 5. values below NOMINAL are considered ok break points - # 6. values above NOMINAL are considered poor break points - # We are computing the strength of the bond between the current - # token and the NEXT token. - my $bond_str = VERY_STRONG; # a default, high strength - - #--------------------------------------------------------------- - # section 1: - # use minimum of left and right bond strengths if defined; - # digraphs and trigraphs like to break on their left - #--------------------------------------------------------------- - my $bsr = $right_bond_strength{$type}; - - if ( !defined($bsr) ) { - - if ( $is_digraph{$type} || $is_trigraph{$type} ) { - $bsr = STRONG; - } - else { - $bsr = VERY_STRONG; - } - } - - # define right bond strengths of certain keywords - if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { - $bsr = $right_bond_strength{$token}; - } - elsif ( $token eq 'ne' or $token eq 'eq' ) { - $bsr = NOMINAL; - } - my $bsl = $left_bond_strength{$next_nonblank_type}; - - # set terminal bond strength to the nominal value - # this will cause good preceding breaks to be retained - if ( $i_next_nonblank > $max_index_to_go ) { - $bsl = NOMINAL; - } - - if ( !defined($bsl) ) { - - if ( $is_digraph{$next_nonblank_type} - || $is_trigraph{$next_nonblank_type} ) - { - $bsl = WEAK; - } - else { - $bsl = VERY_STRONG; - } - } - - # define right bond strengths of certain keywords - if ( $next_nonblank_type eq 'k' - && defined( $left_bond_strength{$next_nonblank_token} ) ) - { - $bsl = $left_bond_strength{$next_nonblank_token}; - } - elsif ($next_nonblank_token eq 'ne' - or $next_nonblank_token eq 'eq' ) - { - $bsl = NOMINAL; - } - elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { - $bsl = 0.9 * NOMINAL + 0.1 * STRONG; - } - - # Note: it might seem that we would want to keep a NO_BREAK if - # either token has this value. This didn't work, because in an - # arrow list, it prevents the comma from separating from the - # following bare word (which is probably quoted by its arrow). - # So necessary NO_BREAK's have to be handled as special cases - # in the final section. - $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; - my $bond_str_1 = $bond_str; - - #--------------------------------------------------------------- - # section 2: - # special cases - #--------------------------------------------------------------- - - # allow long lines before final { in an if statement, as in: - # if (.......... - # ..........) - # { - # - # Otherwise, the line before the { tends to be too short. - if ( $type eq ')' ) { - if ( $next_nonblank_type eq '{' ) { - $bond_str = VERY_WEAK + 0.03; - } - } - - elsif ( $type eq '(' ) { - if ( $next_nonblank_type eq '{' ) { - $bond_str = NOMINAL; - } - } - - # break on something like '} (', but keep this stronger than a ',' - # example is in 'howe.pl' - elsif ( $type eq 'R' or $type eq '}' ) { - if ( $next_nonblank_type eq '(' ) { - $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK; - } - } - - #----------------------------------------------------------------- - # adjust bond strength bias - #----------------------------------------------------------------- - - # TESTING: add any bias set by sub scan_list at old comma - # break points. - elsif ( $type eq ',' ) { - $bond_str += $bond_strength_to_go[$i]; - } - - elsif ( $type eq 'f' ) { - $bond_str += $f_bias; - $f_bias += $delta_bias; - } - - # in long ?: conditionals, bias toward just one set per line (colon.t) - elsif ( $type eq ':' ) { - if ( !$want_break_before{$type} ) { - $bond_str += $colon_bias; - $colon_bias += $delta_bias; - } - } - - if ( $next_nonblank_type eq ':' - && $want_break_before{$next_nonblank_type} ) - { - $bond_str += $colon_bias; - $colon_bias += $delta_bias; - } - - # if leading '.' is used, align all but 'short' quotes; - # the idea is to not place something like "\n" on a single line. - elsif ( $next_nonblank_type eq '.' ) { - if ( $want_break_before{'.'} ) { - unless ( - $last_nonblank_type eq '.' - && ( - length($token) <= - $rOpts_short_concatenation_item_length ) - && ( $token !~ /^[\)\]\}]$/ ) - ) - { - $dot_bias += $delta_bias; - } - $bond_str += $dot_bias; - } - } - elsif ($next_nonblank_type eq '&&' - && $want_break_before{$next_nonblank_type} ) - { - $bond_str += $amp_bias; - $amp_bias += $delta_bias; - } - elsif ($next_nonblank_type eq '||' - && $want_break_before{$next_nonblank_type} ) - { - $bond_str += $bar_bias; - $bar_bias += $delta_bias; - } - elsif ( $next_nonblank_type eq 'k' ) { - - if ( $next_nonblank_token eq 'and' - && $want_break_before{$next_nonblank_token} ) - { - $bond_str += $and_bias; - $and_bias += $delta_bias; - } - elsif ($next_nonblank_token =~ /^(or|err)$/ - && $want_break_before{$next_nonblank_token} ) - { - $bond_str += $or_bias; - $or_bias += $delta_bias; - } - - # FIXME: needs more testing - elsif ( $is_keyword_returning_list{$next_nonblank_token} ) { - $bond_str = $list_str if ( $bond_str > $list_str ); - } - elsif ( $token eq 'err' - && !$want_break_before{$token} ) - { - $bond_str += $or_bias; - $or_bias += $delta_bias; - } - } - - if ( $type eq ':' - && !$want_break_before{$type} ) - { - $bond_str += $colon_bias; - $colon_bias += $delta_bias; - } - elsif ( $type eq '&&' - && !$want_break_before{$type} ) - { - $bond_str += $amp_bias; - $amp_bias += $delta_bias; - } - elsif ( $type eq '||' - && !$want_break_before{$type} ) - { - $bond_str += $bar_bias; - $bar_bias += $delta_bias; - } - elsif ( $type eq 'k' ) { - - if ( $token eq 'and' - && !$want_break_before{$token} ) - { - $bond_str += $and_bias; - $and_bias += $delta_bias; - } - elsif ( $token eq 'or' - && !$want_break_before{$token} ) - { - $bond_str += $or_bias; - $or_bias += $delta_bias; - } - } - - # keep matrix and hash indices together - # but make them a little below STRONG to allow breaking open - # something like {'some-word'}{'some-very-long-word'} at the }{ - # (bracebrk.t) - if ( ( $type eq ']' or $type eq 'R' ) - && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' ) - ) - { - $bond_str = 0.9 * STRONG + 0.1 * NOMINAL; - } - - if ( $next_nonblank_token =~ /^->/ ) { - - # increase strength to the point where a break in the following - # will be after the opening paren rather than at the arrow: - # $a->$b($c); - if ( $type eq 'i' ) { - $bond_str = 1.45 * STRONG; - } - - elsif ( $type =~ /^[\)\]\}R]$/ ) { - $bond_str = 0.1 * STRONG + 0.9 * NOMINAL; - } - - # otherwise make strength before an '->' a little over a '+' - else { - if ( $bond_str <= NOMINAL ) { - $bond_str = NOMINAL + 0.01; - } - } - } - - if ( $token eq ')' && $next_nonblank_token eq '[' ) { - $bond_str = 0.2 * STRONG + 0.8 * NOMINAL; - } - - # map1.t -- correct for a quirk in perl - if ( $token eq '(' - && $next_nonblank_type eq 'i' - && $last_nonblank_type eq 'k' - && $is_sort_map_grep{$last_nonblank_token} ) - - # /^(sort|map|grep)$/ ) - { - $bond_str = NO_BREAK; - } - - # extrude.t: do not break before paren at: - # -l pid_filename( - if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { - $bond_str = NO_BREAK; - } - - # good to break after end of code blocks - if ( $type eq '}' && $block_type ) { - - $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; - $code_bias += $delta_bias; - } - - if ( $type eq 'k' ) { - - # allow certain control keywords to stand out - if ( $next_nonblank_type eq 'k' - && $is_last_next_redo_return{$token} ) - { - $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; - } - -# Don't break after keyword my. This is a quick fix for a -# rare problem with perl. An example is this line from file -# Container.pm: -# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) ) - - if ( $token eq 'my' ) { - $bond_str = NO_BREAK; - } - - } - - # good to break before 'if', 'unless', etc - if ( $is_if_brace_follower{$next_nonblank_token} ) { - $bond_str = VERY_WEAK; - } - - if ( $next_nonblank_type eq 'k' ) { - - # keywords like 'unless', 'if', etc, within statements - # make good breaks - if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { - $bond_str = VERY_WEAK / 1.05; - } - } - - # try not to break before a comma-arrow - elsif ( $next_nonblank_type eq '=>' ) { - if ( $bond_str < STRONG ) { $bond_str = STRONG } - } - - #---------------------------------------------------------------------- - # only set NO_BREAK's from here on - #---------------------------------------------------------------------- - if ( $type eq 'C' or $type eq 'U' ) { - - # use strict requires that bare word and => not be separated - if ( $next_nonblank_type eq '=>' ) { - $bond_str = NO_BREAK; - } - - # Never break between a bareword and a following paren because - # perl may give an error. For example, if a break is placed - # between 'to_filehandle' and its '(' the following line will - # give a syntax error [Carp.pm]: my( $no) =fileno( - # to_filehandle( $in)) ; - if ( $next_nonblank_token eq '(' ) { - $bond_str = NO_BREAK; - } - } - - # use strict requires that bare word within braces not start new line - elsif ( $type eq 'L' ) { - - if ( $next_nonblank_type eq 'w' ) { - $bond_str = NO_BREAK; - } - } - - # in older version of perl, use strict can cause problems with - # breaks before bare words following opening parens. For example, - # this will fail under older versions if a break is made between - # '(' and 'MAIL': - # use strict; - # open( MAIL, "a long filename or command"); - # close MAIL; - elsif ( $type eq '{' ) { - - if ( $token eq '(' && $next_nonblank_type eq 'w' ) { - - # but it's fine to break if the word is followed by a '=>' - # or if it is obviously a sub call - my $i_next_next_nonblank = $i_next_nonblank + 1; - my $next_next_type = $types_to_go[$i_next_next_nonblank]; - if ( $next_next_type eq 'b' - && $i_next_nonblank < $max_index_to_go ) - { - $i_next_next_nonblank++; - $next_next_type = $types_to_go[$i_next_next_nonblank]; - } - - ##if ( $next_next_type ne '=>' ) { - # these are ok: '->xxx', '=>', '(' - - # We'll check for an old breakpoint and keep a leading - # bareword if it was that way in the input file. - # Presumably it was ok that way. For example, the - # following would remain unchanged: - # - # @months = ( - # January, February, March, April, - # May, June, July, August, - # September, October, November, December, - # ); - # - # This should be sufficient: - if ( !$old_breakpoint_to_go[$i] - && ( $next_next_type eq ',' || $next_next_type eq '}' ) - ) - { - $bond_str = NO_BREAK; - } - } - } - - elsif ( $type eq 'w' ) { - - if ( $next_nonblank_type eq 'R' ) { - $bond_str = NO_BREAK; - } - - # use strict requires that bare word and => not be separated - if ( $next_nonblank_type eq '=>' ) { - $bond_str = NO_BREAK; - } - } - - # in fact, use strict hates bare words on any new line. For - # example, a break before the underscore here provokes the - # wrath of use strict: - # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { - elsif ( $type eq 'F' ) { - $bond_str = NO_BREAK; - } - - # use strict does not allow separating type info from trailing { } - # testfile is readmail.pl - elsif ( $type eq 't' or $type eq 'i' ) { - - if ( $next_nonblank_type eq 'L' ) { - $bond_str = NO_BREAK; - } - } - - # Do not break between a possible filehandle and a ? or / and do - # not introduce a break after it if there is no blank - # (extrude.t) - elsif ( $type eq 'Z' ) { - - # dont break.. - if ( - - # if there is no blank and we do not want one. Examples: - # print $x++ # do not break after $x - # print HTML"HELLO" # break ok after HTML - ( - $next_type ne 'b' - && defined( $want_left_space{$next_type} ) - && $want_left_space{$next_type} == WS_NO - ) - - # or we might be followed by the start of a quote - || $next_nonblank_type =~ /^[\/\?]$/ - ) - { - $bond_str = NO_BREAK; - } - } - - # Do not break before a possible file handle - if ( $next_nonblank_type eq 'Z' ) { - $bond_str = NO_BREAK; - } - - # As a defensive measure, do not break between a '(' and a - # filehandle. In some cases, this can cause an error. For - # example, the following program works: - # my $msg="hi!\n"; - # print - # ( STDOUT - # $msg - # ); - # - # But this program fails: - # my $msg="hi!\n"; - # print - # ( - # STDOUT - # $msg - # ); - # - # This is normally only a problem with the 'extrude' option - if ( $next_nonblank_type eq 'Y' && $token eq '(' ) { - $bond_str = NO_BREAK; - } - - # Breaking before a ++ can cause perl to guess wrong. For - # example the following line will cause a syntax error - # with -extrude if we break between '$i' and '++' [fixstyle2] - # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); - elsif ( $next_nonblank_type eq '++' ) { - $bond_str = NO_BREAK; - } - - # Breaking before a ? before a quote can cause trouble if - # they are not separated by a blank. - # Example: a syntax error occurs if you break before the ? here - # my$logic=join$all?' && ':' || ',@regexps; - # From: Professional_Perl_Programming_Code/multifind.pl - elsif ( $next_nonblank_type eq '?' ) { - $bond_str = NO_BREAK - if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); - } - - # Breaking before a . followed by a number - # can cause trouble if there is no intervening space - # Example: a syntax error occurs if you break before the .2 here - # $str .= pack($endian.2, ensurrogate($ord)); - # From: perl58/Unicode.pm - elsif ( $next_nonblank_type eq '.' ) { - $bond_str = NO_BREAK - if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); - } - - # patch to put cuddled elses back together when on multiple - # lines, as in: } \n else \n { \n - if ($rOpts_cuddled_else) { - - if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) - || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) - { - $bond_str = NO_BREAK; - } - } - - # keep '}' together with ';' - if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) { - $bond_str = NO_BREAK; - } - - # never break between sub name and opening paren - if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) { - $bond_str = NO_BREAK; - } - - #--------------------------------------------------------------- - # section 3: - # now take nesting depth into account - #--------------------------------------------------------------- - # final strength incorporates the bond strength and nesting depth - my $strength; - - if ( defined($bond_str) && !$nobreak_to_go[$i] ) { - if ( $total_nesting_depth > 0 ) { - $strength = $bond_str + $total_nesting_depth; - } - else { - $strength = $bond_str; - } - } - else { - $strength = NO_BREAK; - } - - # always break after side comment - if ( $type eq '#' ) { $strength = 0 } - - $bond_strength_to_go[$i] = $strength; - - FORMATTER_DEBUG_FLAG_BOND && do { - my $str = substr( $token, 0, 15 ); - $str .= ' ' x ( 16 - length($str) ); - print -"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n"; - }; - } - } - -} - -sub pad_array_to_go { - - # to simplify coding in scan_list and set_bond_strengths, it helps - # to create some extra blank tokens at the end of the arrays - $tokens_to_go[ $max_index_to_go + 1 ] = ''; - $tokens_to_go[ $max_index_to_go + 2 ] = ''; - $types_to_go[ $max_index_to_go + 1 ] = 'b'; - $types_to_go[ $max_index_to_go + 2 ] = 'b'; - $nesting_depth_to_go[ $max_index_to_go + 1 ] = - $nesting_depth_to_go[$max_index_to_go]; - - # /^[R\}\)\]]$/ - if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { - if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { - - # shouldn't happen: - unless ( get_saw_brace_error() ) { - warning( -"Program bug in scan_list: hit nesting error which should have been caught\n" - ); - report_definite_bug(); - } - } - else { - $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; - } - } - - # /^[L\{\(\[]$/ - elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { - $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; - } -} - -{ # begin scan_list - - my ( - $block_type, $current_depth, - $depth, $i, - $i_last_nonblank_token, $last_colon_sequence_number, - $last_nonblank_token, $last_nonblank_type, - $last_old_breakpoint_count, $minimum_depth, - $next_nonblank_block_type, $next_nonblank_token, - $next_nonblank_type, $old_breakpoint_count, - $starting_breakpoint_count, $starting_depth, - $token, $type, - $type_sequence, - ); - - my ( - @breakpoint_stack, @breakpoint_undo_stack, - @comma_index, @container_type, - @identifier_count_stack, @index_before_arrow, - @interrupted_list, @item_count_stack, - @last_comma_index, @last_dot_index, - @last_nonblank_type, @old_breakpoint_count_stack, - @opening_structure_index_stack, @rfor_semicolon_list, - @has_old_logical_breakpoints, @rand_or_list, - @i_equals, - ); - - # routine to define essential variables when we go 'up' to - # a new depth - sub check_for_new_minimum_depth { - my $depth = shift; - if ( $depth < $minimum_depth ) { - - $minimum_depth = $depth; - - # these arrays need not retain values between calls - $breakpoint_stack[$depth] = $starting_breakpoint_count; - $container_type[$depth] = ""; - $identifier_count_stack[$depth] = 0; - $index_before_arrow[$depth] = -1; - $interrupted_list[$depth] = 1; - $item_count_stack[$depth] = 0; - $last_nonblank_type[$depth] = ""; - $opening_structure_index_stack[$depth] = -1; - - $breakpoint_undo_stack[$depth] = undef; - $comma_index[$depth] = undef; - $last_comma_index[$depth] = undef; - $last_dot_index[$depth] = undef; - $old_breakpoint_count_stack[$depth] = undef; - $has_old_logical_breakpoints[$depth] = 0; - $rand_or_list[$depth] = []; - $rfor_semicolon_list[$depth] = []; - $i_equals[$depth] = -1; - - # these arrays must retain values between calls - if ( !defined( $has_broken_sublist[$depth] ) ) { - $dont_align[$depth] = 0; - $has_broken_sublist[$depth] = 0; - $want_comma_break[$depth] = 0; - } - } - } - - # routine to decide which commas to break at within a container; - # returns: - # $bp_count = number of comma breakpoints set - # $do_not_break_apart = a flag indicating if container need not - # be broken open - sub set_comma_breakpoints { - - my $dd = shift; - my $bp_count = 0; - my $do_not_break_apart = 0; - - # anything to do? - if ( $item_count_stack[$dd] ) { - - # handle commas not in containers... - if ( $dont_align[$dd] ) { - do_uncontained_comma_breaks($dd); - } - - # handle commas within containers... - else { - my $fbc = $forced_breakpoint_count; - - # always open comma lists not preceded by keywords, - # barewords, identifiers (that is, anything that doesn't - # look like a function call) - my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; - - set_comma_breakpoints_do( - $dd, - $opening_structure_index_stack[$dd], - $i, - $item_count_stack[$dd], - $identifier_count_stack[$dd], - $comma_index[$dd], - $next_nonblank_type, - $container_type[$dd], - $interrupted_list[$dd], - \$do_not_break_apart, - $must_break_open, - ); - $bp_count = $forced_breakpoint_count - $fbc; - $do_not_break_apart = 0 if $must_break_open; - } - } - return ( $bp_count, $do_not_break_apart ); - } - - sub do_uncontained_comma_breaks { - - # Handle commas not in containers... - # This is a catch-all routine for commas that we - # don't know what to do with because the don't fall - # within containers. We will bias the bond strength - # to break at commas which ended lines in the input - # file. This usually works better than just trying - # to put as many items on a line as possible. A - # downside is that if the input file is garbage it - # won't work very well. However, the user can always - # prevent following the old breakpoints with the - # -iob flag. - my $dd = shift; - my $bias = -.01; - foreach my $ii ( @{ $comma_index[$dd] } ) { - if ( $old_breakpoint_to_go[$ii] ) { - $bond_strength_to_go[$ii] = $bias; - - # reduce bias magnitude to force breaks in order - $bias *= 0.99; - } - } - - # Also put a break before the first comma if - # (1) there was a break there in the input, and - # (2) that was exactly one previous break in the input - # - # For example, we will follow the user and break after - # 'print' in this snippet: - # print - # "conformability (Not the same dimension)\n", - # "\t", $have, " is ", text_unit($hu), "\n", - # "\t", $want, " is ", text_unit($wu), "\n", - # ; - my $i_first_comma = $comma_index[$dd]->[0]; - if ( $old_breakpoint_to_go[$i_first_comma] ) { - my $level_comma = $levels_to_go[$i_first_comma]; - my $ibreak = -1; - my $obp_count = 0; - for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { - if ( $old_breakpoint_to_go[$ii] ) { - $obp_count++; - last if ( $obp_count > 1 ); - $ibreak = $ii - if ( $levels_to_go[$ii] == $level_comma ); - } - } - if ( $ibreak >= 0 && $obp_count == 1 ) { - set_forced_breakpoint($ibreak); - } - } - } - - my %is_logical_container; - - BEGIN { - @_ = qw# if elsif unless while and or err not && | || ? : ! #; - @is_logical_container{@_} = (1) x scalar(@_); - } - - sub set_for_semicolon_breakpoints { - my $dd = shift; - foreach ( @{ $rfor_semicolon_list[$dd] } ) { - set_forced_breakpoint($_); - } - } - - sub set_logical_breakpoints { - my $dd = shift; - if ( - $item_count_stack[$dd] == 0 - && $is_logical_container{ $container_type[$dd] } - - # TESTING: - || $has_old_logical_breakpoints[$dd] - ) - { - - # Look for breaks in this order: - # 0 1 2 3 - # or and || && - foreach my $i ( 0 .. 3 ) { - if ( $rand_or_list[$dd][$i] ) { - foreach ( @{ $rand_or_list[$dd][$i] } ) { - set_forced_breakpoint($_); - } - - # break at any 'if' and 'unless' too - foreach ( @{ $rand_or_list[$dd][4] } ) { - set_forced_breakpoint($_); - } - $rand_or_list[$dd] = []; - last; - } - } - } - } - - sub is_unbreakable_container { - - # never break a container of one of these types - # because bad things can happen (map1.t) - my $dd = shift; - $is_sort_map_grep{ $container_type[$dd] }; - } - - sub scan_list { - - # This routine is responsible for setting line breaks for all lists, - # so that hierarchical structure can be displayed and so that list - # items can be vertically aligned. The output of this routine is - # stored in the array @forced_breakpoint_to_go, which is used to set - # final breakpoints. - - $starting_depth = $nesting_depth_to_go[0]; - - $block_type = ' '; - $current_depth = $starting_depth; - $i = -1; - $last_colon_sequence_number = -1; - $last_nonblank_token = ';'; - $last_nonblank_type = ';'; - $last_nonblank_block_type = ' '; - $last_old_breakpoint_count = 0; - $minimum_depth = $current_depth + 1; # forces update in check below - $old_breakpoint_count = 0; - $starting_breakpoint_count = $forced_breakpoint_count; - $token = ';'; - $type = ';'; - $type_sequence = ''; - - check_for_new_minimum_depth($current_depth); - - my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; - my $want_previous_breakpoint = -1; - - my $saw_good_breakpoint; - my $i_line_end = -1; - my $i_line_start = -1; - - # loop over all tokens in this batch - while ( ++$i <= $max_index_to_go ) { - if ( $type ne 'b' ) { - $i_last_nonblank_token = $i - 1; - $last_nonblank_type = $type; - $last_nonblank_token = $token; - $last_nonblank_block_type = $block_type; - } - $type = $types_to_go[$i]; - $block_type = $block_type_to_go[$i]; - $token = $tokens_to_go[$i]; - $type_sequence = $type_sequence_to_go[$i]; - my $next_type = $types_to_go[ $i + 1 ]; - my $next_token = $tokens_to_go[ $i + 1 ]; - my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - - # set break if flag was set - if ( $want_previous_breakpoint >= 0 ) { - set_forced_breakpoint($want_previous_breakpoint); - $want_previous_breakpoint = -1; - } - - $last_old_breakpoint_count = $old_breakpoint_count; - if ( $old_breakpoint_to_go[$i] ) { - $i_line_end = $i; - $i_line_start = $i_next_nonblank; - - $old_breakpoint_count++; - - # Break before certain keywords if user broke there and - # this is a 'safe' break point. The idea is to retain - # any preferred breaks for sequential list operations, - # like a schwartzian transform. - if ($rOpts_break_at_old_keyword_breakpoints) { - if ( - $next_nonblank_type eq 'k' - && $is_keyword_returning_list{$next_nonblank_token} - && ( $type =~ /^[=\)\]\}Riw]$/ - || $type eq 'k' - && $is_keyword_returning_list{$token} ) - ) - { - - # we actually have to set this break next time through - # the loop because if we are at a closing token (such - # as '}') which forms a one-line block, this break might - # get undone. - $want_previous_breakpoint = $i; - } - } - } - next if ( $type eq 'b' ); - $depth = $nesting_depth_to_go[ $i + 1 ]; - - # safety check - be sure we always break after a comment - # Shouldn't happen .. an error here probably means that the - # nobreak flag did not get turned off correctly during - # formatting. - if ( $type eq '#' ) { - if ( $i != $max_index_to_go ) { - warning( -"Non-fatal program bug: backup logic needed to break after a comment\n" - ); - report_definite_bug(); - $nobreak_to_go[$i] = 0; - set_forced_breakpoint($i); - } - } - - # Force breakpoints at certain tokens in long lines. - # Note that such breakpoints will be undone later if these tokens - # are fully contained within parens on a line. - if ( - - # break before a keyword within a line - $type eq 'k' - && $i > 0 - - # if one of these keywords: - && $token =~ /^(if|unless|while|until|for)$/ - - # but do not break at something like '1 while' - && ( $last_nonblank_type ne 'n' || $i > 2 ) - - # and let keywords follow a closing 'do' brace - && $last_nonblank_block_type ne 'do' - - && ( - $is_long_line - - # or container is broken (by side-comment, etc) - || ( $next_nonblank_token eq '(' - && $mate_index_to_go[$i_next_nonblank] < $i ) - ) - ) - { - set_forced_breakpoint( $i - 1 ); - } - - # remember locations of '||' and '&&' for possible breaks if we - # decide this is a long logical expression. - if ( $type eq '||' ) { - push @{ $rand_or_list[$depth][2] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } - elsif ( $type eq '&&' ) { - push @{ $rand_or_list[$depth][3] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } - elsif ( $type eq 'f' ) { - push @{ $rfor_semicolon_list[$depth] }, $i; - } - elsif ( $type eq 'k' ) { - if ( $token eq 'and' ) { - push @{ $rand_or_list[$depth][1] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } - - # break immediately at 'or's which are probably not in a logical - # block -- but we will break in logical breaks below so that - # they do not add to the forced_breakpoint_count - elsif ( $token eq 'or' ) { - push @{ $rand_or_list[$depth][0] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - if ( $is_logical_container{ $container_type[$depth] } ) { - } - else { - if ($is_long_line) { set_forced_breakpoint($i) } - elsif ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ) - { - $saw_good_breakpoint = 1; - } - } - } - elsif ( $token eq 'if' || $token eq 'unless' ) { - push @{ $rand_or_list[$depth][4] }, $i; - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ) - { - set_forced_breakpoint($i); - } - } - } - elsif ( $is_assignment{$type} ) { - $i_equals[$depth] = $i; - } - - if ($type_sequence) { - - # handle any postponed closing breakpoints - if ( $token =~ /^[\)\]\}\:]$/ ) { - if ( $type eq ':' ) { - $last_colon_sequence_number = $type_sequence; - - # TESTING: retain break at a ':' line break - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_ternary_breakpoints ) - { - - # TESTING: - set_forced_breakpoint($i); - - # break at previous '=' - if ( $i_equals[$depth] > 0 ) { - set_forced_breakpoint( $i_equals[$depth] ); - $i_equals[$depth] = -1; - } - } - } - if ( defined( $postponed_breakpoint{$type_sequence} ) ) { - my $inc = ( $type eq ':' ) ? 0 : 1; - set_forced_breakpoint( $i - $inc ); - delete $postponed_breakpoint{$type_sequence}; - } - } - - # set breaks at ?/: if they will get separated (and are - # not a ?/: chain), or if the '?' is at the end of the - # line - elsif ( $token eq '?' ) { - my $i_colon = $mate_index_to_go[$i]; - if ( - $i_colon <= 0 # the ':' is not in this batch - || $i == 0 # this '?' is the first token of the line - || $i == - $max_index_to_go # or this '?' is the last token - ) - { - - # don't break at a '?' if preceded by ':' on - # this line of previous ?/: pair on this line. - # This is an attempt to preserve a chain of ?/: - # expressions (elsif2.t). And don't break if - # this has a side comment. - set_forced_breakpoint($i) - unless ( - $type_sequence == ( - $last_colon_sequence_number + - TYPE_SEQUENCE_INCREMENT - ) - || $tokens_to_go[$max_index_to_go] eq '#' - ); - set_closing_breakpoint($i); - } - } - } - -#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; - - #------------------------------------------------------------ - # Handle Increasing Depth.. - # - # prepare for a new list when depth increases - # token $i is a '(','{', or '[' - #------------------------------------------------------------ - if ( $depth > $current_depth ) { - - $breakpoint_stack[$depth] = $forced_breakpoint_count; - $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; - $has_broken_sublist[$depth] = 0; - $identifier_count_stack[$depth] = 0; - $index_before_arrow[$depth] = -1; - $interrupted_list[$depth] = 0; - $item_count_stack[$depth] = 0; - $last_comma_index[$depth] = undef; - $last_dot_index[$depth] = undef; - $last_nonblank_type[$depth] = $last_nonblank_type; - $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; - $opening_structure_index_stack[$depth] = $i; - $rand_or_list[$depth] = []; - $rfor_semicolon_list[$depth] = []; - $i_equals[$depth] = -1; - $want_comma_break[$depth] = 0; - $container_type[$depth] = - ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) - ? $last_nonblank_token - : ""; - $has_old_logical_breakpoints[$depth] = 0; - - # if line ends here then signal closing token to break - if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) - { - set_closing_breakpoint($i); - } - - # Not all lists of values should be vertically aligned.. - $dont_align[$depth] = - - # code BLOCKS are handled at a higher level - ( $block_type ne "" ) - - # certain paren lists - || ( $type eq '(' ) && ( - - # it does not usually look good to align a list of - # identifiers in a parameter list, as in: - # my($var1, $var2, ...) - # (This test should probably be refined, for now I'm just - # testing for any keyword) - ( $last_nonblank_type eq 'k' ) - - # a trailing '(' usually indicates a non-list - || ( $next_nonblank_type eq '(' ) - ); - - # patch to outdent opening brace of long if/for/.. - # statements (like this one). See similar coding in - # set_continuation breaks. We have also catch it here for - # short line fragments which otherwise will not go through - # set_continuation_breaks. - if ( - $block_type - - # if we have the ')' but not its '(' in this batch.. - && ( $last_nonblank_token eq ')' ) - && $mate_index_to_go[$i_last_nonblank_token] < 0 - - # and user wants brace to left - && !$rOpts->{'opening-brace-always-on-right'} - - && ( $type eq '{' ) # should be true - && ( $token eq '{' ) # should be true - ) - { - set_forced_breakpoint( $i - 1 ); - } - } - - #------------------------------------------------------------ - # Handle Decreasing Depth.. - # - # finish off any old list when depth decreases - # token $i is a ')','}', or ']' - #------------------------------------------------------------ - elsif ( $depth < $current_depth ) { - - check_for_new_minimum_depth($depth); - - # force all outer logical containers to break after we see on - # old breakpoint - $has_old_logical_breakpoints[$depth] ||= - $has_old_logical_breakpoints[$current_depth]; - - # Patch to break between ') {' if the paren list is broken. - # There is similar logic in set_continuation_breaks for - # non-broken lists. - if ( $token eq ')' - && $next_nonblank_block_type - && $interrupted_list[$current_depth] - && $next_nonblank_type eq '{' - && !$rOpts->{'opening-brace-always-on-right'} ) - { - set_forced_breakpoint($i); - } - -#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; - - # set breaks at commas if necessary - my ( $bp_count, $do_not_break_apart ) = - set_comma_breakpoints($current_depth); - - my $i_opening = $opening_structure_index_stack[$current_depth]; - my $saw_opening_structure = ( $i_opening >= 0 ); - - # this term is long if we had to break at interior commas.. - my $is_long_term = $bp_count > 0; - - # ..or if the length between opening and closing parens exceeds - # allowed line length - if ( !$is_long_term && $saw_opening_structure ) { - my $i_opening_minus = find_token_starting_list($i_opening); - - # Note: we have to allow for one extra space after a - # closing token so that we do not strand a comma or - # semicolon, hence the '>=' here (oneline.t) - $is_long_term = - excess_line_length( $i_opening_minus, $i ) >= 0; - } - - # We've set breaks after all comma-arrows. Now we have to - # undo them if this can be a one-line block - # (the only breakpoints set will be due to comma-arrows) - if ( - - # user doesn't require breaking after all comma-arrows - ( $rOpts_comma_arrow_breakpoints != 0 ) - - # and if the opening structure is in this batch - && $saw_opening_structure - - # and either on the same old line - && ( - $old_breakpoint_count_stack[$current_depth] == - $last_old_breakpoint_count - - # or user wants to form long blocks with arrows - || $rOpts_comma_arrow_breakpoints == 2 - ) - - # and we made some breakpoints between the opening and closing - && ( $breakpoint_undo_stack[$current_depth] < - $forced_breakpoint_undo_count ) - - # and this block is short enough to fit on one line - # Note: use < because need 1 more space for possible comma - && !$is_long_term - - ) - { - undo_forced_breakpoint_stack( - $breakpoint_undo_stack[$current_depth] ); - } - - # now see if we have any comma breakpoints left - my $has_comma_breakpoints = - ( $breakpoint_stack[$current_depth] != - $forced_breakpoint_count ); - - # update broken-sublist flag of the outer container - $has_broken_sublist[$depth] = - $has_broken_sublist[$depth] - || $has_broken_sublist[$current_depth] - || $is_long_term - || $has_comma_breakpoints; - -# Having come to the closing ')', '}', or ']', now we have to decide if we -# should 'open up' the structure by placing breaks at the opening and -# closing containers. This is a tricky decision. Here are some of the -# basic considerations: -# -# -If this is a BLOCK container, then any breakpoints will have already -# been set (and according to user preferences), so we need do nothing here. -# -# -If we have a comma-separated list for which we can align the list items, -# then we need to do so because otherwise the vertical aligner cannot -# currently do the alignment. -# -# -If this container does itself contain a container which has been broken -# open, then it should be broken open to properly show the structure. -# -# -If there is nothing to align, and no other reason to break apart, -# then do not do it. -# -# We will not break open the parens of a long but 'simple' logical expression. -# For example: -# -# This is an example of a simple logical expression and its formatting: -# -# if ( $bigwasteofspace1 && $bigwasteofspace2 -# || $bigwasteofspace3 && $bigwasteofspace4 ) -# -# Most people would prefer this than the 'spacey' version: -# -# if ( -# $bigwasteofspace1 && $bigwasteofspace2 -# || $bigwasteofspace3 && $bigwasteofspace4 -# ) -# -# To illustrate the rules for breaking logical expressions, consider: -# -# FULLY DENSE: -# if ( $opt_excl -# and ( exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc )) -# -# This is on the verge of being difficult to read. The current default is to -# open it up like this: -# -# DEFAULT: -# if ( -# $opt_excl -# and ( exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc ) -# ) -# -# This is a compromise which tries to avoid being too dense and to spacey. -# A more spaced version would be: -# -# SPACEY: -# if ( -# $opt_excl -# and ( -# exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc -# ) -# ) -# -# Some people might prefer the spacey version -- an option could be added. The -# innermost expression contains a long block '( exists $ids_... ')'. -# -# Here is how the logic goes: We will force a break at the 'or' that the -# innermost expression contains, but we will not break apart its opening and -# closing containers because (1) it contains no multi-line sub-containers itself, -# and (2) there is no alignment to be gained by breaking it open like this -# -# and ( -# exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc -# ) -# -# (although this looks perfectly ok and might be good for long expressions). The -# outer 'if' container, though, contains a broken sub-container, so it will be -# broken open to avoid too much density. Also, since it contains no 'or's, there -# will be a forced break at its 'and'. - - # set some flags telling something about this container.. - my $is_simple_logical_expression = 0; - if ( $item_count_stack[$current_depth] == 0 - && $saw_opening_structure - && $tokens_to_go[$i_opening] eq '(' - && $is_logical_container{ $container_type[$current_depth] } - ) - { - - # This seems to be a simple logical expression with - # no existing breakpoints. Set a flag to prevent - # opening it up. - if ( !$has_comma_breakpoints ) { - $is_simple_logical_expression = 1; - } - - # This seems to be a simple logical expression with - # breakpoints (broken sublists, for example). Break - # at all 'or's and '||'s. - else { - set_logical_breakpoints($current_depth); - } - } - - if ( $is_long_term - && @{ $rfor_semicolon_list[$current_depth] } ) - { - set_for_semicolon_breakpoints($current_depth); - - # open up a long 'for' or 'foreach' container to allow - # leading term alignment unless -lp is used. - $has_comma_breakpoints = 1 - unless $rOpts_line_up_parentheses; - } - - if ( - - # breaks for code BLOCKS are handled at a higher level - !$block_type - - # we do not need to break at the top level of an 'if' - # type expression - && !$is_simple_logical_expression - - ## modification to keep ': (' containers vertically tight; - ## but probably better to let user set -vt=1 to avoid - ## inconsistency with other paren types - ## && ($container_type[$current_depth] ne ':') - - # otherwise, we require one of these reasons for breaking: - && ( - - # - this term has forced line breaks - $has_comma_breakpoints - - # - the opening container is separated from this batch - # for some reason (comment, blank line, code block) - # - this is a non-paren container spanning multiple lines - || !$saw_opening_structure - - # - this is a long block contained in another breakable - # container - || ( $is_long_term - && $container_environment_to_go[$i_opening] ne - 'BLOCK' ) - ) - ) - { - - # For -lp option, we must put a breakpoint before - # the token which has been identified as starting - # this indentation level. This is necessary for - # proper alignment. - if ( $rOpts_line_up_parentheses && $saw_opening_structure ) - { - my $item = $leading_spaces_to_go[ $i_opening + 1 ]; - if ( $i_opening + 1 < $max_index_to_go - && $types_to_go[ $i_opening + 1 ] eq 'b' ) - { - $item = $leading_spaces_to_go[ $i_opening + 2 ]; - } - if ( defined($item) ) { - my $i_start_2 = $item->get_STARTING_INDEX(); - if ( - defined($i_start_2) - - # we are breaking after an opening brace, paren, - # so don't break before it too - && $i_start_2 ne $i_opening - ) - { - - # Only break for breakpoints at the same - # indentation level as the opening paren - my $test1 = $nesting_depth_to_go[$i_opening]; - my $test2 = $nesting_depth_to_go[$i_start_2]; - if ( $test2 == $test1 ) { - set_forced_breakpoint( $i_start_2 - 1 ); - } - } - } - } - - # break after opening structure. - # note: break before closing structure will be automatic - if ( $minimum_depth <= $current_depth ) { - - set_forced_breakpoint($i_opening) - unless ( $do_not_break_apart - || is_unbreakable_container($current_depth) ); - - # break at '.' of lower depth level before opening token - if ( $last_dot_index[$depth] ) { - set_forced_breakpoint( $last_dot_index[$depth] ); - } - - # break before opening structure if preeced by another - # closing structure and a comma. This is normally - # done by the previous closing brace, but not - # if it was a one-line block. - if ( $i_opening > 2 ) { - my $i_prev = - ( $types_to_go[ $i_opening - 1 ] eq 'b' ) - ? $i_opening - 2 - : $i_opening - 1; - - if ( $types_to_go[$i_prev] eq ',' - && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) - { - set_forced_breakpoint($i_prev); - } - - # also break before something like ':(' or '?(' - # if appropriate. - elsif ( - $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) - { - my $token_prev = $tokens_to_go[$i_prev]; - if ( $want_break_before{$token_prev} ) { - set_forced_breakpoint($i_prev); - } - } - } - } - - # break after comma following closing structure - if ( $next_type eq ',' ) { - set_forced_breakpoint( $i + 1 ); - } - - # break before an '=' following closing structure - if ( - $is_assignment{$next_nonblank_type} - && ( $breakpoint_stack[$current_depth] != - $forced_breakpoint_count ) - ) - { - set_forced_breakpoint($i); - } - - # break at any comma before the opening structure Added - # for -lp, but seems to be good in general. It isn't - # obvious how far back to look; the '5' below seems to - # work well and will catch the comma in something like - # push @list, myfunc( $param, $param, .. - - my $icomma = $last_comma_index[$depth]; - if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { - unless ( $forced_breakpoint_to_go[$icomma] ) { - set_forced_breakpoint($icomma); - } - } - } # end logic to open up a container - - # Break open a logical container open if it was already open - elsif ($is_simple_logical_expression - && $has_old_logical_breakpoints[$current_depth] ) - { - set_logical_breakpoints($current_depth); - } - - # Handle long container which does not get opened up - elsif ($is_long_term) { - - # must set fake breakpoint to alert outer containers that - # they are complex - set_fake_breakpoint(); - } - } - - #------------------------------------------------------------ - # Handle this token - #------------------------------------------------------------ - - $current_depth = $depth; - - # handle comma-arrow - if ( $type eq '=>' ) { - next if ( $last_nonblank_type eq '=>' ); - next if $rOpts_break_at_old_comma_breakpoints; - next if $rOpts_comma_arrow_breakpoints == 3; - $want_comma_break[$depth] = 1; - $index_before_arrow[$depth] = $i_last_nonblank_token; - next; - } - - elsif ( $type eq '.' ) { - $last_dot_index[$depth] = $i; - } - - # Turn off alignment if we are sure that this is not a list - # environment. To be safe, we will do this if we see certain - # non-list tokens, such as ';', and also the environment is - # not a list. Note that '=' could be in any of the = operators - # (lextest.t). We can't just use the reported environment - # because it can be incorrect in some cases. - elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) - && $container_environment_to_go[$i] ne 'LIST' ) - { - $dont_align[$depth] = 1; - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; - } - - # now just handle any commas - next unless ( $type eq ',' ); - - $last_dot_index[$depth] = undef; - $last_comma_index[$depth] = $i; - - # break here if this comma follows a '=>' - # but not if there is a side comment after the comma - if ( $want_comma_break[$depth] ) { - - if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; - next; - } - - set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); - - # break before the previous token if it looks safe - # Example of something that we will not try to break before: - # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, - # Also we don't want to break at a binary operator (like +): - # $c->createOval( - # $x + $R, $y + - # $R => $x - $R, - # $y - $R, -fill => 'black', - # ); - my $ibreak = $index_before_arrow[$depth] - 1; - if ( $ibreak > 0 - && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) - { - if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } - if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } - if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { - - # don't break pointer calls, such as the following: - # File::Spec->curdir => 1, - # (This is tokenized as adjacent 'w' tokens) - if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { - set_forced_breakpoint($ibreak); - } - } - } - - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; - - # handle list which mixes '=>'s and ','s: - # treat any list items so far as an interrupted list - $interrupted_list[$depth] = 1; - next; - } - - # break after all commas above starting depth - if ( $depth < $starting_depth && !$dont_align[$depth] ) { - set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); - next; - } - - # add this comma to the list.. - my $item_count = $item_count_stack[$depth]; - if ( $item_count == 0 ) { - - # but do not form a list with no opening structure - # for example: - - # open INFILE_COPY, ">$input_file_copy" - # or die ("very long message"); - - if ( ( $opening_structure_index_stack[$depth] < 0 ) - && $container_environment_to_go[$i] eq 'BLOCK' ) - { - $dont_align[$depth] = 1; - } - } - - $comma_index[$depth][$item_count] = $i; - ++$item_count_stack[$depth]; - if ( $last_nonblank_type =~ /^[iR\]]$/ ) { - $identifier_count_stack[$depth]++; - } - } - - #------------------------------------------- - # end of loop over all tokens in this batch - #------------------------------------------- - - # set breaks for any unfinished lists .. - for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { - - $interrupted_list[$dd] = 1; - $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); - set_comma_breakpoints($dd); - set_logical_breakpoints($dd) - if ( $has_old_logical_breakpoints[$dd] ); - set_for_semicolon_breakpoints($dd); - - # break open container... - my $i_opening = $opening_structure_index_stack[$dd]; - set_forced_breakpoint($i_opening) - unless ( - is_unbreakable_container($dd) - - # Avoid a break which would place an isolated ' or " - # on a line - || ( $type eq 'Q' - && $i_opening >= $max_index_to_go - 2 - && $token =~ /^['"]$/ ) - ); - } - - # Return a flag indicating if the input file had some good breakpoints. - # This flag will be used to force a break in a line shorter than the - # allowed line length. - if ( $has_old_logical_breakpoints[$current_depth] ) { - $saw_good_breakpoint = 1; - } - return $saw_good_breakpoint; - } -} # end scan_list - -sub find_token_starting_list { - - # When testing to see if a block will fit on one line, some - # previous token(s) may also need to be on the line; particularly - # if this is a sub call. So we will look back at least one - # token. NOTE: This isn't perfect, but not critical, because - # if we mis-identify a block, it will be wrapped and therefore - # fixed the next time it is formatted. - my $i_opening_paren = shift; - my $i_opening_minus = $i_opening_paren; - my $im1 = $i_opening_paren - 1; - my $im2 = $i_opening_paren - 2; - my $im3 = $i_opening_paren - 3; - my $typem1 = $types_to_go[$im1]; - my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; - if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { - $i_opening_minus = $i_opening_paren; - } - elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { - $i_opening_minus = $im1 if $im1 >= 0; - - # walk back to improve length estimate - for ( my $j = $im1 ; $j >= 0 ; $j-- ) { - last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); - $i_opening_minus = $j; - } - if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } - } - elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } - elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { - $i_opening_minus = $im2; - } - return $i_opening_minus; -} - -{ # begin set_comma_breakpoints_do - - my %is_keyword_with_special_leading_term; - - BEGIN { - - # These keywords have prototypes which allow a special leading item - # followed by a list - @_ = - qw(formline grep kill map printf sprintf push chmod join pack unshift); - @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_); - } - - sub set_comma_breakpoints_do { - - # Given a list with some commas, set breakpoints at some of the - # commas, if necessary, to make it easy to read. This list is - # an example: - my ( - $depth, $i_opening_paren, $i_closing_paren, - $item_count, $identifier_count, $rcomma_index, - $next_nonblank_type, $list_type, $interrupted, - $rdo_not_break_apart, $must_break_open, - ) = @_; - - # nothing to do if no commas seen - return if ( $item_count < 1 ); - my $i_first_comma = $$rcomma_index[0]; - my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ]; - my $i_last_comma = $i_true_last_comma; - if ( $i_last_comma >= $max_index_to_go ) { - $i_last_comma = $$rcomma_index[ --$item_count - 1 ]; - return if ( $item_count < 1 ); - } - - #--------------------------------------------------------------- - # find lengths of all items in the list to calculate page layout - #--------------------------------------------------------------- - my $comma_count = $item_count; - my @item_lengths; - my @i_term_begin; - my @i_term_end; - my @i_term_comma; - my $i_prev_plus; - my @max_length = ( 0, 0 ); - my $first_term_length; - my $i = $i_opening_paren; - my $is_odd = 1; - - for ( my $j = 0 ; $j < $comma_count ; $j++ ) { - $is_odd = 1 - $is_odd; - $i_prev_plus = $i + 1; - $i = $$rcomma_index[$j]; - - my $i_term_end = - ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; - my $i_term_begin = - ( $types_to_go[$i_prev_plus] eq 'b' ) - ? $i_prev_plus + 1 - : $i_prev_plus; - push @i_term_begin, $i_term_begin; - push @i_term_end, $i_term_end; - push @i_term_comma, $i; - - # note: currently adding 2 to all lengths (for comma and space) - my $length = - 2 + token_sequence_length( $i_term_begin, $i_term_end ); - push @item_lengths, $length; - - if ( $j == 0 ) { - $first_term_length = $length; - } - else { - - if ( $length > $max_length[$is_odd] ) { - $max_length[$is_odd] = $length; - } - } - } - - # now we have to make a distinction between the comma count and item - # count, because the item count will be one greater than the comma - # count if the last item is not terminated with a comma - my $i_b = - ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) - ? $i_last_comma + 1 - : $i_last_comma; - my $i_e = - ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) - ? $i_closing_paren - 2 - : $i_closing_paren - 1; - my $i_effective_last_comma = $i_last_comma; - - my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); - - if ( $last_item_length > 0 ) { - - # add 2 to length because other lengths include a comma and a blank - $last_item_length += 2; - push @item_lengths, $last_item_length; - push @i_term_begin, $i_b + 1; - push @i_term_end, $i_e; - push @i_term_comma, undef; - - my $i_odd = $item_count % 2; - - if ( $last_item_length > $max_length[$i_odd] ) { - $max_length[$i_odd] = $last_item_length; - } - - $item_count++; - $i_effective_last_comma = $i_e + 1; - - if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { - $identifier_count++; - } - } - - #--------------------------------------------------------------- - # End of length calculations - #--------------------------------------------------------------- - - #--------------------------------------------------------------- - # Compound List Rule 1: - # Break at (almost) every comma for a list containing a broken - # sublist. This has higher priority than the Interrupted List - # Rule. - #--------------------------------------------------------------- - if ( $has_broken_sublist[$depth] ) { - - # Break at every comma except for a comma between two - # simple, small terms. This prevents long vertical - # columns of, say, just 0's. - my $small_length = 10; # 2 + actual maximum length wanted - - # We'll insert a break in long runs of small terms to - # allow alignment in uniform tables. - my $skipped_count = 0; - my $columns = table_columns_available($i_first_comma); - my $fields = int( $columns / $small_length ); - if ( $rOpts_maximum_fields_per_table - && $fields > $rOpts_maximum_fields_per_table ) - { - $fields = $rOpts_maximum_fields_per_table; - } - my $max_skipped_count = $fields - 1; - - my $is_simple_last_term = 0; - my $is_simple_next_term = 0; - foreach my $j ( 0 .. $item_count ) { - $is_simple_last_term = $is_simple_next_term; - $is_simple_next_term = 0; - if ( $j < $item_count - && $i_term_end[$j] == $i_term_begin[$j] - && $item_lengths[$j] <= $small_length ) - { - $is_simple_next_term = 1; - } - next if $j == 0; - if ( $is_simple_last_term - && $is_simple_next_term - && $skipped_count < $max_skipped_count ) - { - $skipped_count++; - } - else { - $skipped_count = 0; - my $i = $i_term_comma[ $j - 1 ]; - last unless defined $i; - set_forced_breakpoint($i); - } - } - - # always break at the last comma if this list is - # interrupted; we wouldn't want to leave a terminal '{', for - # example. - if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } - return; - } - -#my ( $a, $b, $c ) = caller(); -#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count -#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; -#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; - - #--------------------------------------------------------------- - # Interrupted List Rule: - # A list is is forced to use old breakpoints if it was interrupted - # by side comments or blank lines, or requested by user. - #--------------------------------------------------------------- - if ( $rOpts_break_at_old_comma_breakpoints - || $interrupted - || $i_opening_paren < 0 ) - { - copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); - return; - } - - #--------------------------------------------------------------- - # Looks like a list of items. We have to look at it and size it up. - #--------------------------------------------------------------- - - my $opening_token = $tokens_to_go[$i_opening_paren]; - my $opening_environment = - $container_environment_to_go[$i_opening_paren]; - - #------------------------------------------------------------------- - # Return if this will fit on one line - #------------------------------------------------------------------- - - my $i_opening_minus = find_token_starting_list($i_opening_paren); - return - unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; - - #------------------------------------------------------------------- - # Now we know that this block spans multiple lines; we have to set - # at least one breakpoint -- real or fake -- as a signal to break - # open any outer containers. - #------------------------------------------------------------------- - set_fake_breakpoint(); - - # be sure we do not extend beyond the current list length - if ( $i_effective_last_comma >= $max_index_to_go ) { - $i_effective_last_comma = $max_index_to_go - 1; - } - - # Set a flag indicating if we need to break open to keep -lp - # items aligned. This is necessary if any of the list terms - # exceeds the available space after the '('. - my $need_lp_break_open = $must_break_open; - if ( $rOpts_line_up_parentheses && !$must_break_open ) { - my $columns_if_unbroken = $rOpts_maximum_line_length - - total_line_length( $i_opening_minus, $i_opening_paren ); - $need_lp_break_open = - ( $max_length[0] > $columns_if_unbroken ) - || ( $max_length[1] > $columns_if_unbroken ) - || ( $first_term_length > $columns_if_unbroken ); - } - - # Specify if the list must have an even number of fields or not. - # It is generally safest to assume an even number, because the - # list items might be a hash list. But if we can be sure that - # it is not a hash, then we can allow an odd number for more - # flexibility. - my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count - - if ( $identifier_count >= $item_count - 1 - || $is_assignment{$next_nonblank_type} - || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) - ) - { - $odd_or_even = 1; - } - - # do we have a long first term which should be - # left on a line by itself? - my $use_separate_first_term = ( - $odd_or_even == 1 # only if we can use 1 field/line - && $item_count > 3 # need several items - && $first_term_length > - 2 * $max_length[0] - 2 # need long first term - && $first_term_length > - 2 * $max_length[1] - 2 # need long first term - ); - - # or do we know from the type of list that the first term should - # be placed alone? - if ( !$use_separate_first_term ) { - if ( $is_keyword_with_special_leading_term{$list_type} ) { - $use_separate_first_term = 1; - - # should the container be broken open? - if ( $item_count < 3 ) { - if ( $i_first_comma - $i_opening_paren < 4 ) { - $$rdo_not_break_apart = 1; - } - } - elsif ($first_term_length < 20 - && $i_first_comma - $i_opening_paren < 4 ) - { - my $columns = table_columns_available($i_first_comma); - if ( $first_term_length < $columns ) { - $$rdo_not_break_apart = 1; - } - } - } - } - - # if so, - if ($use_separate_first_term) { - - # ..set a break and update starting values - $use_separate_first_term = 1; - set_forced_breakpoint($i_first_comma); - $i_opening_paren = $i_first_comma; - $i_first_comma = $$rcomma_index[1]; - $item_count--; - return if $comma_count == 1; - shift @item_lengths; - shift @i_term_begin; - shift @i_term_end; - shift @i_term_comma; - } - - # if not, update the metrics to include the first term - else { - if ( $first_term_length > $max_length[0] ) { - $max_length[0] = $first_term_length; - } - } - - # Field width parameters - my $pair_width = ( $max_length[0] + $max_length[1] ); - my $max_width = - ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; - - # Number of free columns across the page width for laying out tables - my $columns = table_columns_available($i_first_comma); - - # Estimated maximum number of fields which fit this space - # This will be our first guess - my $number_of_fields_max = - maximum_number_of_fields( $columns, $odd_or_even, $max_width, - $pair_width ); - my $number_of_fields = $number_of_fields_max; - - # Find the best-looking number of fields - # and make this our second guess if possible - my ( $number_of_fields_best, $ri_ragged_break_list, - $new_identifier_count ) - = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, - $max_width ); - - if ( $number_of_fields_best != 0 - && $number_of_fields_best < $number_of_fields_max ) - { - $number_of_fields = $number_of_fields_best; - } - - # ---------------------------------------------------------------------- - # If we are crowded and the -lp option is being used, try to - # undo some indentation - # ---------------------------------------------------------------------- - if ( - $rOpts_line_up_parentheses - && ( - $number_of_fields == 0 - || ( $number_of_fields == 1 - && $number_of_fields != $number_of_fields_best ) - ) - ) - { - my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma); - if ( $available_spaces > 0 ) { - - my $spaces_wanted = $max_width - $columns; # for 1 field - - if ( $number_of_fields_best == 0 ) { - $number_of_fields_best = - get_maximum_fields_wanted( \@item_lengths ); - } - - if ( $number_of_fields_best != 1 ) { - my $spaces_wanted_2 = - 1 + $pair_width - $columns; # for 2 fields - if ( $available_spaces > $spaces_wanted_2 ) { - $spaces_wanted = $spaces_wanted_2; - } - } - - if ( $spaces_wanted > 0 ) { - my $deleted_spaces = - reduce_lp_indentation( $i_first_comma, $spaces_wanted ); - - # redo the math - if ( $deleted_spaces > 0 ) { - $columns = table_columns_available($i_first_comma); - $number_of_fields_max = - maximum_number_of_fields( $columns, $odd_or_even, - $max_width, $pair_width ); - $number_of_fields = $number_of_fields_max; - - if ( $number_of_fields_best == 1 - && $number_of_fields >= 1 ) - { - $number_of_fields = $number_of_fields_best; - } - } - } - } - } - - # try for one column if two won't work - if ( $number_of_fields <= 0 ) { - $number_of_fields = int( $columns / $max_width ); - } - - # The user can place an upper bound on the number of fields, - # which can be useful for doing maintenance on tables - if ( $rOpts_maximum_fields_per_table - && $number_of_fields > $rOpts_maximum_fields_per_table ) - { - $number_of_fields = $rOpts_maximum_fields_per_table; - } - - # How many columns (characters) and lines would this container take - # if no additional whitespace were added? - my $packed_columns = token_sequence_length( $i_opening_paren + 1, - $i_effective_last_comma + 1 ); - if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero - my $packed_lines = 1 + int( $packed_columns / $columns ); - - # are we an item contained in an outer list? - my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; - - if ( $number_of_fields <= 0 ) { - -# #--------------------------------------------------------------- -# # We're in trouble. We can't find a single field width that works. -# # There is no simple answer here; we may have a single long list -# # item, or many. -# #--------------------------------------------------------------- -# -# In many cases, it may be best to not force a break if there is just one -# comma, because the standard continuation break logic will do a better -# job without it. -# -# In the common case that all but one of the terms can fit -# on a single line, it may look better not to break open the -# containing parens. Consider, for example -# -# $color = -# join ( '/', -# sort { $color_value{$::a} <=> $color_value{$::b}; } -# keys %colors ); -# -# which will look like this with the container broken: -# -# $color = join ( -# '/', -# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors -# ); -# -# Here is an example of this rule for a long last term: -# -# log_message( 0, 256, 128, -# "Number of routes in adj-RIB-in to be considered: $peercount" ); -# -# And here is an example with a long first term: -# -# $s = sprintf( -# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", -# $r, $pu, $ps, $cu, $cs, $tt -# ) -# if $style eq 'all'; - - my $i_last_comma = $$rcomma_index[ $comma_count - 1 ]; - my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; - my $long_first_term = - excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; - - # break at every comma ... - if ( - - # if requested by user or is best looking - $number_of_fields_best == 1 - - # or if this is a sublist of a larger list - || $in_hierarchical_list - - # or if multiple commas and we dont have a long first or last - # term - || ( $comma_count > 1 - && !( $long_last_term || $long_first_term ) ) - ) - { - foreach ( 0 .. $comma_count - 1 ) { - set_forced_breakpoint( $$rcomma_index[$_] ); - } - } - elsif ($long_last_term) { - - set_forced_breakpoint($i_last_comma); - $$rdo_not_break_apart = 1 unless $must_break_open; - } - elsif ($long_first_term) { - - set_forced_breakpoint($i_first_comma); - } - else { - - # let breaks be defined by default bond strength logic - } - return; - } - - # -------------------------------------------------------- - # We have a tentative field count that seems to work. - # How many lines will this require? - # -------------------------------------------------------- - my $formatted_lines = $item_count / ($number_of_fields); - if ( $formatted_lines != int $formatted_lines ) { - $formatted_lines = 1 + int $formatted_lines; - } - - # So far we've been trying to fill out to the right margin. But - # compact tables are easier to read, so let's see if we can use fewer - # fields without increasing the number of lines. - $number_of_fields = - compactify_table( $item_count, $number_of_fields, $formatted_lines, - $odd_or_even ); - - # How many spaces across the page will we fill? - my $columns_per_line = - ( int $number_of_fields / 2 ) * $pair_width + - ( $number_of_fields % 2 ) * $max_width; - - my $formatted_columns; - - if ( $number_of_fields > 1 ) { - $formatted_columns = - ( $pair_width * ( int( $item_count / 2 ) ) + - ( $item_count % 2 ) * $max_width ); - } - else { - $formatted_columns = $max_width * $item_count; - } - if ( $formatted_columns < $packed_columns ) { - $formatted_columns = $packed_columns; - } - - my $unused_columns = $formatted_columns - $packed_columns; - - # set some empirical parameters to help decide if we should try to - # align; high sparsity does not look good, especially with few lines - my $sparsity = ($unused_columns) / ($formatted_columns); - my $max_allowed_sparsity = - ( $item_count < 3 ) ? 0.1 - : ( $packed_lines == 1 ) ? 0.15 - : ( $packed_lines == 2 ) ? 0.4 - : 0.7; - - # Begin check for shortcut methods, which avoid treating a list - # as a table for relatively small parenthesized lists. These - # are usually easier to read if not formatted as tables. - if ( - $packed_lines <= 2 # probably can fit in 2 lines - && $item_count < 9 # doesn't have too many items - && $opening_environment eq 'BLOCK' # not a sub-container - && $opening_token eq '(' # is paren list - ) - { - - # Shortcut method 1: for -lp and just one comma: - # This is a no-brainer, just break at the comma. - if ( - $rOpts_line_up_parentheses # -lp - && $item_count == 2 # two items, one comma - && !$must_break_open - ) - { - my $i_break = $$rcomma_index[0]; - set_forced_breakpoint($i_break); - $$rdo_not_break_apart = 1; - set_non_alignment_flags( $comma_count, $rcomma_index ); - return; - - } - - # method 2 is for most small ragged lists which might look - # best if not displayed as a table. - if ( - ( $number_of_fields == 2 && $item_count == 3 ) - || ( - $new_identifier_count > 0 # isn't all quotes - && $sparsity > 0.15 - ) # would be fairly spaced gaps if aligned - ) - { - - my $break_count = set_ragged_breakpoints( \@i_term_comma, - $ri_ragged_break_list ); - ++$break_count if ($use_separate_first_term); - - # NOTE: we should really use the true break count here, - # which can be greater if there are large terms and - # little space, but usually this will work well enough. - unless ($must_break_open) { - - if ( $break_count <= 1 ) { - $$rdo_not_break_apart = 1; - } - elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) - { - $$rdo_not_break_apart = 1; - } - } - set_non_alignment_flags( $comma_count, $rcomma_index ); - return; - } - - } # end shortcut methods - - # debug stuff - - FORMATTER_DEBUG_FLAG_SPARSE && do { - print -"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; - - }; - - #--------------------------------------------------------------- - # Compound List Rule 2: - # If this list is too long for one line, and it is an item of a - # larger list, then we must format it, regardless of sparsity - # (ian.t). One reason that we have to do this is to trigger - # Compound List Rule 1, above, which causes breaks at all commas of - # all outer lists. In this way, the structure will be properly - # displayed. - #--------------------------------------------------------------- - - # Decide if this list is too long for one line unless broken - my $total_columns = table_columns_available($i_opening_paren); - my $too_long = $packed_columns > $total_columns; - - # For a paren list, include the length of the token just before the - # '(' because this is likely a sub call, and we would have to - # include the sub name on the same line as the list. This is still - # imprecise, but not too bad. (steve.t) - if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { - - $too_long = excess_line_length( $i_opening_minus, - $i_effective_last_comma + 1 ) > 0; - } - - # FIXME: For an item after a '=>', try to include the length of the - # thing before the '=>'. This is crude and should be improved by - # actually looking back token by token. - if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { - my $i_opening_minus = $i_opening_paren - 4; - if ( $i_opening_minus >= 0 ) { - $too_long = excess_line_length( $i_opening_minus, - $i_effective_last_comma + 1 ) > 0; - } - } - - # Always break lists contained in '[' and '{' if too long for 1 line, - # and always break lists which are too long and part of a more complex - # structure. - my $must_break_open_container = $must_break_open - || ( $too_long - && ( $in_hierarchical_list || $opening_token ne '(' ) ); - -#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; - - #--------------------------------------------------------------- - # The main decision: - # Now decide if we will align the data into aligned columns. Do not - # attempt to align columns if this is a tiny table or it would be - # too spaced. It seems that the more packed lines we have, the - # sparser the list that can be allowed and still look ok. - #--------------------------------------------------------------- - - if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) - || ( $formatted_lines < 2 ) - || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) - ) - { - - #--------------------------------------------------------------- - # too sparse: would look ugly if aligned in a table; - #--------------------------------------------------------------- - - # use old breakpoints if this is a 'big' list - # FIXME: goal is to improve set_ragged_breakpoints so that - # this is not necessary. - if ( $packed_lines > 2 && $item_count > 10 ) { - write_logfile_entry("List sparse: using old breakpoints\n"); - copy_old_breakpoints( $i_first_comma, $i_last_comma ); - } - - # let the continuation logic handle it if 2 lines - else { - - my $break_count = set_ragged_breakpoints( \@i_term_comma, - $ri_ragged_break_list ); - ++$break_count if ($use_separate_first_term); - - unless ($must_break_open_container) { - if ( $break_count <= 1 ) { - $$rdo_not_break_apart = 1; - } - elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) - { - $$rdo_not_break_apart = 1; - } - } - set_non_alignment_flags( $comma_count, $rcomma_index ); - } - return; - } - - #--------------------------------------------------------------- - # go ahead and format as a table - #--------------------------------------------------------------- - write_logfile_entry( - "List: auto formatting with $number_of_fields fields/row\n"); - - my $j_first_break = - $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; - - for ( - my $j = $j_first_break ; - $j < $comma_count ; - $j += $number_of_fields - ) - { - my $i = $$rcomma_index[$j]; - set_forced_breakpoint($i); - } - return; - } -} - -sub set_non_alignment_flags { - - # set flag which indicates that these commas should not be - # aligned - my ( $comma_count, $rcomma_index ) = @_; - foreach ( 0 .. $comma_count - 1 ) { - $matching_token_to_go[ $$rcomma_index[$_] ] = 1; - } -} - -sub study_list_complexity { - - # Look for complex tables which should be formatted with one term per line. - # Returns the following: - # - # \@i_ragged_break_list = list of good breakpoints to avoid lines - # which are hard to read - # $number_of_fields_best = suggested number of fields based on - # complexity; = 0 if any number may be used. - # - my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; - my $item_count = @{$ri_term_begin}; - my $complex_item_count = 0; - my $number_of_fields_best = $rOpts_maximum_fields_per_table; - my $i_max = @{$ritem_lengths} - 1; - ##my @item_complexity; - - my $i_last_last_break = -3; - my $i_last_break = -2; - my @i_ragged_break_list; - - my $definitely_complex = 30; - my $definitely_simple = 12; - my $quote_count = 0; - - for my $i ( 0 .. $i_max ) { - my $ib = $ri_term_begin->[$i]; - my $ie = $ri_term_end->[$i]; - - # define complexity: start with the actual term length - my $weighted_length = ( $ritem_lengths->[$i] - 2 ); - - ##TBD: join types here and check for variations - ##my $str=join "", @tokens_to_go[$ib..$ie]; - - my $is_quote = 0; - if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { - $is_quote = 1; - $quote_count++; - } - elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { - $quote_count++; - } - - if ( $ib eq $ie ) { - if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { - $complex_item_count++; - $weighted_length *= 2; - } - else { - } - } - else { - if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { - $complex_item_count++; - $weighted_length *= 2; - } - if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { - $weighted_length += 4; - } - } - - # add weight for extra tokens. - $weighted_length += 2 * ( $ie - $ib ); - -## my $BUB = join '', @tokens_to_go[$ib..$ie]; -## print "# COMPLEXITY:$weighted_length $BUB\n"; - -##push @item_complexity, $weighted_length; - - # now mark a ragged break after this item it if it is 'long and - # complex': - if ( $weighted_length >= $definitely_complex ) { - - # if we broke after the previous term - # then break before it too - if ( $i_last_break == $i - 1 - && $i > 1 - && $i_last_last_break != $i - 2 ) - { - - ## FIXME: don't strand a small term - pop @i_ragged_break_list; - push @i_ragged_break_list, $i - 2; - push @i_ragged_break_list, $i - 1; - } - - push @i_ragged_break_list, $i; - $i_last_last_break = $i_last_break; - $i_last_break = $i; - } - - # don't break before a small last term -- it will - # not look good on a line by itself. - elsif ($i == $i_max - && $i_last_break == $i - 1 - && $weighted_length <= $definitely_simple ) - { - pop @i_ragged_break_list; - } - } - - my $identifier_count = $i_max + 1 - $quote_count; - - # Need more tuning here.. - if ( $max_width > 12 - && $complex_item_count > $item_count / 2 - && $number_of_fields_best != 2 ) - { - $number_of_fields_best = 1; - } - - return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); -} - -sub get_maximum_fields_wanted { - - # Not all tables look good with more than one field of items. - # This routine looks at a table and decides if it should be - # formatted with just one field or not. - # This coding is still under development. - my ($ritem_lengths) = @_; - - my $number_of_fields_best = 0; - - # For just a few items, we tentatively assume just 1 field. - my $item_count = @{$ritem_lengths}; - if ( $item_count <= 5 ) { - $number_of_fields_best = 1; - } - - # For larger tables, look at it both ways and see what looks best - else { - - my $is_odd = 1; - my @max_length = ( 0, 0 ); - my @last_length_2 = ( undef, undef ); - my @first_length_2 = ( undef, undef ); - my $last_length = undef; - my $total_variation_1 = 0; - my $total_variation_2 = 0; - my @total_variation_2 = ( 0, 0 ); - for ( my $j = 0 ; $j < $item_count ; $j++ ) { - - $is_odd = 1 - $is_odd; - my $length = $ritem_lengths->[$j]; - if ( $length > $max_length[$is_odd] ) { - $max_length[$is_odd] = $length; - } - - if ( defined($last_length) ) { - my $dl = abs( $length - $last_length ); - $total_variation_1 += $dl; - } - $last_length = $length; - - my $ll = $last_length_2[$is_odd]; - if ( defined($ll) ) { - my $dl = abs( $length - $ll ); - $total_variation_2[$is_odd] += $dl; - } - else { - $first_length_2[$is_odd] = $length; - } - $last_length_2[$is_odd] = $length; - } - $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; - - my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; - unless ( $total_variation_2 < $factor * $total_variation_1 ) { - $number_of_fields_best = 1; - } - } - return ($number_of_fields_best); -} - -sub table_columns_available { - my $i_first_comma = shift; - my $columns = - $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma); - - # Patch: the vertical formatter does not line up lines whose lengths - # exactly equal the available line length because of allowances - # that must be made for side comments. Therefore, the number of - # available columns is reduced by 1 character. - $columns -= 1; - return $columns; -} - -sub maximum_number_of_fields { - - # how many fields will fit in the available space? - my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; - my $max_pairs = int( $columns / $pair_width ); - my $number_of_fields = $max_pairs * 2; - if ( $odd_or_even == 1 - && $max_pairs * $pair_width + $max_width <= $columns ) - { - $number_of_fields++; - } - return $number_of_fields; -} - -sub compactify_table { - - # given a table with a certain number of fields and a certain number - # of lines, see if reducing the number of fields will make it look - # better. - my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; - if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { - my $min_fields; - - for ( - $min_fields = $number_of_fields ; - $min_fields >= $odd_or_even - && $min_fields * $formatted_lines >= $item_count ; - $min_fields -= $odd_or_even - ) - { - $number_of_fields = $min_fields; - } - } - return $number_of_fields; -} - -sub set_ragged_breakpoints { - - # Set breakpoints in a list that cannot be formatted nicely as a - # table. - my ( $ri_term_comma, $ri_ragged_break_list ) = @_; - - my $break_count = 0; - foreach (@$ri_ragged_break_list) { - my $j = $ri_term_comma->[$_]; - if ($j) { - set_forced_breakpoint($j); - $break_count++; - } - } - return $break_count; -} - -sub copy_old_breakpoints { - my ( $i_first_comma, $i_last_comma ) = @_; - for my $i ( $i_first_comma .. $i_last_comma ) { - if ( $old_breakpoint_to_go[$i] ) { - set_forced_breakpoint($i); - } - } -} - -sub set_nobreaks { - my ( $i, $j ) = @_; - if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { - - FORMATTER_DEBUG_FLAG_NOBREAK && do { - my ( $a, $b, $c ) = caller(); - print( -"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n" - ); - }; - - @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); - } - - # shouldn't happen; non-critical error - else { - FORMATTER_DEBUG_FLAG_NOBREAK && do { - my ( $a, $b, $c ) = caller(); - print( -"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n" - ); - }; - } -} - -sub set_fake_breakpoint { - - # Just bump up the breakpoint count as a signal that there are breaks. - # This is useful if we have breaks but may want to postpone deciding where - # to make them. - $forced_breakpoint_count++; -} - -sub set_forced_breakpoint { - my $i = shift; - - return unless defined $i && $i >= 0; - - # when called with certain tokens, use bond strengths to decide - # if we break before or after it - my $token = $tokens_to_go[$i]; - - if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { - if ( $want_break_before{$token} && $i >= 0 ) { $i-- } - } - - # breaks are forced before 'if' and 'unless' - elsif ( $is_if_unless{$token} ) { $i-- } - - if ( $i >= 0 && $i <= $max_index_to_go ) { - my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; - - FORMATTER_DEBUG_FLAG_FORCE && do { - my ( $a, $b, $c ) = caller(); - print -"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; - }; - - if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { - $forced_breakpoint_to_go[$i_nonblank] = 1; - - if ( $i_nonblank > $index_max_forced_break ) { - $index_max_forced_break = $i_nonblank; - } - $forced_breakpoint_count++; - $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = - $i_nonblank; - - # if we break at an opening container..break at the closing - if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { - set_closing_breakpoint($i_nonblank); - } - } - } -} - -sub clear_breakpoint_undo_stack { - $forced_breakpoint_undo_count = 0; -} - -sub undo_forced_breakpoint_stack { - - my $i_start = shift; - if ( $i_start < 0 ) { - $i_start = 0; - my ( $a, $b, $c ) = caller(); - warning( -"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " - ); - } - - while ( $forced_breakpoint_undo_count > $i_start ) { - my $i = - $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; - if ( $i >= 0 && $i <= $max_index_to_go ) { - $forced_breakpoint_to_go[$i] = 0; - $forced_breakpoint_count--; - - FORMATTER_DEBUG_FLAG_UNDOBP && do { - my ( $a, $b, $c ) = caller(); - print( -"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n" - ); - }; - } - - # shouldn't happen, but not a critical error - else { - FORMATTER_DEBUG_FLAG_UNDOBP && do { - my ( $a, $b, $c ) = caller(); - print( -"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go" - ); - }; - } - } -} - -{ # begin recombine_breakpoints - - my %is_amp_amp; - my %is_ternary; - my %is_math_op; - - BEGIN { - - @_ = qw( && || ); - @is_amp_amp{@_} = (1) x scalar(@_); - - @_ = qw( ? : ); - @is_ternary{@_} = (1) x scalar(@_); - - @_ = qw( + - * / ); - @is_math_op{@_} = (1) x scalar(@_); - } - - sub recombine_breakpoints { - - # sub set_continuation_breaks is very liberal in setting line breaks - # for long lines, always setting breaks at good breakpoints, even - # when that creates small lines. Occasionally small line fragments - # are produced which would look better if they were combined. - # That's the task of this routine, recombine_breakpoints. - # - # $ri_beg = ref to array of BEGinning indexes of each line - # $ri_end = ref to array of ENDing indexes of each line - my ( $ri_beg, $ri_end ) = @_; - - my $more_to_do = 1; - - # We keep looping over all of the lines of this batch - # until there are no more possible recombinations - my $nmax_last = @$ri_end; - while ($more_to_do) { - my $n_best = 0; - my $bs_best; - my $n; - my $nmax = @$ri_end - 1; - - # safety check for infinite loop - unless ( $nmax < $nmax_last ) { - - # shouldn't happen because splice below decreases nmax on each pass: - # but i get paranoid sometimes - die "Program bug-infinite loop in recombine breakpoints\n"; - } - $nmax_last = $nmax; - $more_to_do = 0; - my $previous_outdentable_closing_paren; - my $leading_amp_count = 0; - my $this_line_is_semicolon_terminated; - - # loop over all remaining lines in this batch - for $n ( 1 .. $nmax ) { - - #---------------------------------------------------------- - # If we join the current pair of lines, - # line $n-1 will become the left part of the joined line - # line $n will become the right part of the joined line - # - # Here are Indexes of the endpoint tokens of the two lines: - # - # -----line $n-1--- | -----line $n----- - # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 - # ^ - # | - # We want to decide if we should remove the line break - # betwen the tokens at $iend_1 and $ibeg_2 - # - # We will apply a number of ad-hoc tests to see if joining - # here will look ok. The code will just issue a 'next' - # command if the join doesn't look good. If we get through - # the gauntlet of tests, the lines will be recombined. - #---------------------------------------------------------- - # - # beginning and ending tokens of the lines we are working on - my $ibeg_1 = $$ri_beg[ $n - 1 ]; - my $iend_1 = $$ri_end[ $n - 1 ]; - my $iend_2 = $$ri_end[$n]; - my $ibeg_2 = $$ri_beg[$n]; - - my $ibeg_nmax = $$ri_beg[$nmax]; - - # some beginning indexes of other lines, which may not exist - my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1; - my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1; - my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1; - - my $bs_tweak = 0; - - #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - - # $nesting_depth_to_go[$ibeg_1] ); - -##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n"; - - # If line $n is the last line, we set some flags and - # do any special checks for it - if ( $n == $nmax ) { - - # a terminal '{' should stay where it is - next if $types_to_go[$ibeg_2] eq '{'; - - # set flag if statement $n ends in ';' - $this_line_is_semicolon_terminated = - $types_to_go[$iend_2] eq ';' - - # with possible side comment - || ( $types_to_go[$iend_2] eq '#' - && $iend_2 - $ibeg_2 >= 2 - && $types_to_go[ $iend_2 - 2 ] eq ';' - && $types_to_go[ $iend_2 - 1 ] eq 'b' ); - } - - #---------------------------------------------------------- - # Section 1: examine token at $iend_1 (right end of first line - # of pair) - #---------------------------------------------------------- - - # an isolated '}' may join with a ';' terminated segment - if ( $types_to_go[$iend_1] eq '}' ) { - - # Check for cases where combining a semicolon terminated - # statement with a previous isolated closing paren will - # allow the combined line to be outdented. This is - # generally a good move. For example, we can join up - # the last two lines here: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) - # = stat($file); - # - # to get: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) = stat($file); - # - # which makes the parens line up. - # - # Another example, from Joe Matarazzo, probably looks best - # with the 'or' clause appended to the trailing paren: - # $self->some_method( - # PARAM1 => 'foo', - # PARAM2 => 'bar' - # ) or die "Some_method didn't work"; - # - $previous_outdentable_closing_paren = - $this_line_is_semicolon_terminated # ends in ';' - && $ibeg_1 == $iend_1 # only one token on last line - && $tokens_to_go[$iend_1] eq - ')' # must be structural paren - - # only &&, ||, and : if no others seen - # (but note: our count made below could be wrong - # due to intervening comments) - && ( $leading_amp_count == 0 - || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ ) - - # but leading colons probably line up with with a - # previous colon or question (count could be wrong). - && $types_to_go[$ibeg_2] ne ':' - - # only one step in depth allowed. this line must not - # begin with a ')' itself. - && ( $nesting_depth_to_go[$iend_1] == - $nesting_depth_to_go[$iend_2] + 1 ); - - next - unless ( - $previous_outdentable_closing_paren - - # handle '.' and '?' specially below - || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ ) - ); - } - - # do not recombine lines with ending &&, ||, - elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) { - next unless $want_break_before{ $types_to_go[$iend_1] }; - } - - # keep a terminal colon - elsif ( $types_to_go[$iend_1] eq ':' ) { - next unless $want_break_before{ $types_to_go[$iend_1] }; - } - - # Identify and recombine a broken ?/: chain - elsif ( $types_to_go[$iend_1] eq '?' ) { - - # Do not recombine different levels - next - if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); - - # do not recombine unless next line ends in : - next unless $types_to_go[$iend_2] eq ':'; - } - - # for lines ending in a comma... - elsif ( $types_to_go[$iend_1] eq ',' ) { - - # Do not recombine at comma which is following the - # input bias. - # TODO: might be best to make a special flag - next if ( $old_breakpoint_to_go[$iend_1] ); - - # an isolated '},' may join with an identifier + ';' - # this is useful for the class of a 'bless' statement (bless.t) - if ( $types_to_go[$ibeg_1] eq '}' - && $types_to_go[$ibeg_2] eq 'i' ) - { - next - unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) - && ( $iend_2 == ( $ibeg_2 + 1 ) ) - && $this_line_is_semicolon_terminated ); - - # override breakpoint - $forced_breakpoint_to_go[$iend_1] = 0; - } - - # but otherwise .. - else { - - # do not recombine after a comma unless this will leave - # just 1 more line - next unless ( $n + 1 >= $nmax ); - - # do not recombine if there is a change in indentation depth - next - if ( - $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); - - # do not recombine a "complex expression" after a - # comma. "complex" means no parens. - my $saw_paren; - foreach my $ii ( $ibeg_2 .. $iend_2 ) { - if ( $tokens_to_go[$ii] eq '(' ) { - $saw_paren = 1; - last; - } - } - next if $saw_paren; - } - } - - # opening paren.. - elsif ( $types_to_go[$iend_1] eq '(' ) { - - # No longer doing this - } - - elsif ( $types_to_go[$iend_1] eq ')' ) { - - # No longer doing this - } - - # keep a terminal for-semicolon - elsif ( $types_to_go[$iend_1] eq 'f' ) { - next; - } - - # if '=' at end of line ... - elsif ( $is_assignment{ $types_to_go[$iend_1] } ) { - - my $is_short_quote = - ( $types_to_go[$ibeg_2] eq 'Q' - && $ibeg_2 == $iend_2 - && length( $tokens_to_go[$ibeg_2] ) < - $rOpts_short_concatenation_item_length ); - my $is_ternary = - ( $types_to_go[$ibeg_1] eq '?' - && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); - - # always join an isolated '=', a short quote, or if this - # will put ?/: at start of adjacent lines - if ( $ibeg_1 != $iend_1 - && !$is_short_quote - && !$is_ternary ) - { - next - unless ( - ( - - # unless we can reduce this to two lines - $nmax < $n + 2 - - # or three lines, the last with a leading semicolon - || ( $nmax == $n + 2 - && $types_to_go[$ibeg_nmax] eq ';' ) - - # or the next line ends with a here doc - || $types_to_go[$iend_2] eq 'h' - - # or the next line ends in an open paren or brace - # and the break hasn't been forced [dima.t] - || ( !$forced_breakpoint_to_go[$iend_1] - && $types_to_go[$iend_2] eq '{' ) - ) - - # do not recombine if the two lines might align well - # this is a very approximate test for this - && ( $ibeg_3 >= 0 - && $types_to_go[$ibeg_2] ne - $types_to_go[$ibeg_3] ) - ); - - # -lp users often prefer this: - # my $title = function($env, $env, $sysarea, - # "bubba Borrower Entry"); - # so we will recombine if -lp is used we have ending - # comma - if ( !$rOpts_line_up_parentheses - || $types_to_go[$iend_2] ne ',' ) - { - - # otherwise, scan the rhs line up to last token for - # complexity. Note that we are not counting the last - # token in case it is an opening paren. - my $tv = 0; - my $depth = $nesting_depth_to_go[$ibeg_2]; - for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) { - if ( $nesting_depth_to_go[$i] != $depth ) { - $tv++; - last if ( $tv > 1 ); - } - $depth = $nesting_depth_to_go[$i]; - } - - # ok to recombine if no level changes before last token - if ( $tv > 0 ) { - - # otherwise, do not recombine if more than two - # level changes. - next if ( $tv > 1 ); - - # check total complexity of the two adjacent lines - # that will occur if we do this join - my $istop = - ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2; - for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) { - if ( $nesting_depth_to_go[$i] != $depth ) { - $tv++; - last if ( $tv > 2 ); - } - $depth = $nesting_depth_to_go[$i]; - } - - # do not recombine if total is more than 2 level changes - next if ( $tv > 2 ); - } - } - } - - unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { - $forced_breakpoint_to_go[$iend_1] = 0; - } - } - - # for keywords.. - elsif ( $types_to_go[$iend_1] eq 'k' ) { - - # make major control keywords stand out - # (recombine.t) - next - if ( - - #/^(last|next|redo|return)$/ - $is_last_next_redo_return{ $tokens_to_go[$iend_1] } - - # but only if followed by multiple lines - && $n < $nmax - ); - - if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { - next - unless $want_break_before{ $tokens_to_go[$iend_1] }; - } - } - - # handle trailing + - * / - elsif ( $is_math_op{ $types_to_go[$iend_1] } ) { - - # combine lines if next line has single number - # or a short term followed by same operator - my $i_next_nonblank = $ibeg_2; - my $i_next_next = $i_next_nonblank + 1; - $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); - my $number_follows = $types_to_go[$i_next_nonblank] eq 'n' - && ( - $i_next_nonblank == $iend_2 - || ( $i_next_next == $iend_2 - && $is_math_op{ $types_to_go[$i_next_next] } ) - || $types_to_go[$i_next_next] eq ';' - ); - - # find token before last operator of previous line - my $iend_1_minus = $iend_1; - $iend_1_minus-- - if ( $iend_1_minus > $ibeg_1 ); - $iend_1_minus-- - if ( $types_to_go[$iend_1_minus] eq 'b' - && $iend_1_minus > $ibeg_1 ); - - my $short_term_follows = - ( $types_to_go[$iend_2] eq $types_to_go[$iend_1] - && $types_to_go[$iend_1_minus] =~ /^[in]$/ - && $iend_2 <= $ibeg_2 + 2 - && length( $tokens_to_go[$ibeg_2] ) < - $rOpts_short_concatenation_item_length ); - - next - unless ( $number_follows || $short_term_follows ); - } - - #---------------------------------------------------------- - # Section 2: Now examine token at $ibeg_2 (left end of second - # line of pair) - #---------------------------------------------------------- - - # join lines identified above as capable of - # causing an outdented line with leading closing paren - if ($previous_outdentable_closing_paren) { - $forced_breakpoint_to_go[$iend_1] = 0; - } - - # do not recombine lines with leading : - elsif ( $types_to_go[$ibeg_2] eq ':' ) { - $leading_amp_count++; - next if $want_break_before{ $types_to_go[$ibeg_2] }; - } - - # handle lines with leading &&, || - elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { - - $leading_amp_count++; - - # ok to recombine if it follows a ? or : - # and is followed by an open paren.. - my $ok = - ( $is_ternary{ $types_to_go[$ibeg_1] } - && $tokens_to_go[$iend_2] eq '(' ) - - # or is followed by a ? or : at same depth - # - # We are looking for something like this. We can - # recombine the && line with the line above to make the - # structure more clear: - # return - # exists $G->{Attr}->{V} - # && exists $G->{Attr}->{V}->{$u} - # ? %{ $G->{Attr}->{V}->{$u} } - # : (); - # - # We should probably leave something like this alone: - # return - # exists $G->{Attr}->{E} - # && exists $G->{Attr}->{E}->{$u} - # && exists $G->{Attr}->{E}->{$u}->{$v} - # ? %{ $G->{Attr}->{E}->{$u}->{$v} } - # : (); - # so that we either have all of the &&'s (or ||'s) - # on one line, as in the first example, or break at - # each one as in the second example. However, it - # sometimes makes things worse to check for this because - # it prevents multiple recombinations. So this is not done. - || ( $ibeg_3 >= 0 - && $is_ternary{ $types_to_go[$ibeg_3] } - && $nesting_depth_to_go[$ibeg_3] == - $nesting_depth_to_go[$ibeg_2] ); - - next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] }; - $forced_breakpoint_to_go[$iend_1] = 0; - - # tweak the bond strength to give this joint priority - # over ? and : - $bs_tweak = 0.25; - } - - # Identify and recombine a broken ?/: chain - elsif ( $types_to_go[$ibeg_2] eq '?' ) { - - # Do not recombine different levels - my $lev = $levels_to_go[$ibeg_2]; - next if ( $lev ne $levels_to_go[$ibeg_1] ); - - # Do not recombine a '?' if either next line or - # previous line does not start with a ':'. The reasons - # are that (1) no alignment of the ? will be possible - # and (2) the expression is somewhat complex, so the - # '?' is harder to see in the interior of the line. - my $follows_colon = - $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':'; - my $precedes_colon = - $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; - next unless ( $follows_colon || $precedes_colon ); - - # we will always combining a ? line following a : line - if ( !$follows_colon ) { - - # ...otherwise recombine only if it looks like a chain. - # we will just look at a few nearby lines to see if - # this looks like a chain. - my $local_count = 0; - foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { - $local_count++ - if $ii >= 0 - && $types_to_go[$ii] eq ':' - && $levels_to_go[$ii] == $lev; - } - next unless ( $local_count > 1 ); - } - $forced_breakpoint_to_go[$iend_1] = 0; - } - - # do not recombine lines with leading '.' - elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) { - my $i_next_nonblank = $ibeg_2 + 1; - if ( $types_to_go[$i_next_nonblank] eq 'b' ) { - $i_next_nonblank++; - } - - next - unless ( - - # ... unless there is just one and we can reduce - # this to two lines if we do. For example, this - # - # - # $bodyA .= - # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' - # - # looks better than this: - # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' - # . '$args .= $pat;' - - ( - $n == 2 - && $n == $nmax - && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] - ) - - # ... or this would strand a short quote , like this - # . "some long qoute" - # . "\n"; - || ( $types_to_go[$i_next_nonblank] eq 'Q' - && $i_next_nonblank >= $iend_2 - 1 - && length( $tokens_to_go[$i_next_nonblank] ) < - $rOpts_short_concatenation_item_length ) - ); - } - - # handle leading keyword.. - elsif ( $types_to_go[$ibeg_2] eq 'k' ) { - - # handle leading "or" - if ( $tokens_to_go[$ibeg_2] eq 'or' ) { - next - unless ( - $this_line_is_semicolon_terminated - && ( - - # following 'if' or 'unless' or 'or' - $types_to_go[$ibeg_1] eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] } - - # important: only combine a very simple or - # statement because the step below may have - # combined a trailing 'and' with this or, - # and we do not want to then combine - # everything together - && ( $iend_2 - $ibeg_2 <= 7 ) - ) - ); - } - - # handle leading 'and' - elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { - - # Decide if we will combine a single terminal 'and' - # after an 'if' or 'unless'. - - # This looks best with the 'and' on the same - # line as the 'if': - # - # $a = 1 - # if $seconds and $nu < 2; - # - # But this looks better as shown: - # - # $a = 1 - # if !$this->{Parents}{$_} - # or $this->{Parents}{$_} eq $_; - # - next - unless ( - $this_line_is_semicolon_terminated - && ( - - # following 'if' or 'unless' or 'or' - $types_to_go[$ibeg_1] eq 'k' - && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } - || $tokens_to_go[$ibeg_1] eq 'or' ) - ) - ); - } - - # handle leading "if" and "unless" - elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { - - # FIXME: This is still experimental..may not be too useful - next - unless ( - $this_line_is_semicolon_terminated - - # previous line begins with 'and' or 'or' - && $types_to_go[$ibeg_1] eq 'k' - && $is_and_or{ $tokens_to_go[$ibeg_1] } - - ); - } - - # handle all other leading keywords - else { - - # keywords look best at start of lines, - # but combine things like "1 while" - unless ( $is_assignment{ $types_to_go[$iend_1] } ) { - next - if ( ( $types_to_go[$iend_1] ne 'k' ) - && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); - } - } - } - - # similar treatment of && and || as above for 'and' and 'or': - # NOTE: This block of code is currently bypassed because - # of a previous block but is retained for possible future use. - elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { - - # maybe looking at something like: - # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; - - next - unless ( - $this_line_is_semicolon_terminated - - # previous line begins with an 'if' or 'unless' keyword - && $types_to_go[$ibeg_1] eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] } - - ); - } - - # handle leading + - * / - elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) { - my $i_next_nonblank = $ibeg_2 + 1; - if ( $types_to_go[$i_next_nonblank] eq 'b' ) { - $i_next_nonblank++; - } - - my $i_next_next = $i_next_nonblank + 1; - $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); - - my $is_number = ( - $types_to_go[$i_next_nonblank] eq 'n' - && ( $i_next_nonblank >= $iend_2 - 1 - || $types_to_go[$i_next_next] eq ';' ) - ); - - my $iend_1_nonblank = - $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1; - my $iend_2_nonblank = - $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2; - - my $is_short_term = - ( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1] - && $types_to_go[$iend_2_nonblank] =~ /^[in]$/ - && $types_to_go[$iend_1_nonblank] =~ /^[in]$/ - && $iend_2_nonblank <= $ibeg_2 + 2 - && length( $tokens_to_go[$iend_2_nonblank] ) < - $rOpts_short_concatenation_item_length ); - - # Combine these lines if this line is a single - # number, or if it is a short term with same - # operator as the previous line. For example, in - # the following code we will combine all of the - # short terms $A, $B, $C, $D, $E, $F, together - # instead of leaving them one per line: - # my $time = - # $A * $B * $C * $D * $E * $F * - # ( 2. * $eps * $sigma * $area ) * - # ( 1. / $tcold**3 - 1. / $thot**3 ); - # This can be important in math-intensive code. - next - unless ( - $is_number - || $is_short_term - - # or if we can reduce this to two lines if we do. - || ( $n == 2 - && $n == $nmax - && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] ) - ); - } - - # handle line with leading = or similar - elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) { - next unless $n == 1; - next - unless ( - - # unless we can reduce this to two lines - $nmax == 2 - - # or three lines, the last with a leading semicolon - || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) - - # or the next line ends with a here doc - || $types_to_go[$iend_2] eq 'h' - ); - } - - #---------------------------------------------------------- - # Section 3: - # Combine the lines if we arrive here and it is possible - #---------------------------------------------------------- - - # honor hard breakpoints - next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); - - my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; - - # combined line cannot be too long - next - if excess_line_length( $ibeg_1, $iend_2 ) > 0; - - # do not recombine if we would skip in indentation levels - if ( $n < $nmax ) { - my $if_next = $$ri_beg[ $n + 1 ]; - next - if ( - $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] - && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] - - # but an isolated 'if (' is undesirable - && !( - $n == 1 - && $iend_1 - $ibeg_1 <= 2 - && $types_to_go[$ibeg_1] eq 'k' - && $tokens_to_go[$ibeg_1] eq 'if' - && $tokens_to_go[$iend_1] ne '(' - ) - ); - } - - # honor no-break's - next if ( $bs == NO_BREAK ); - - # remember the pair with the greatest bond strength - if ( !$n_best ) { - $n_best = $n; - $bs_best = $bs; - } - else { - - if ( $bs > $bs_best ) { - $n_best = $n; - $bs_best = $bs; - } - } - } - - # recombine the pair with the greatest bond strength - if ($n_best) { - splice @$ri_beg, $n_best, 1; - splice @$ri_end, $n_best - 1, 1; - - # keep going if we are still making progress - $more_to_do++; - } - } - return ( $ri_beg, $ri_end ); - } -} # end recombine_breakpoints - -sub break_all_chain_tokens { - - # scan the current breakpoints looking for breaks at certain "chain - # operators" (. : && || + etc) which often occur repeatedly in a long - # statement. If we see a break at any one, break at all similar tokens - # within the same container. - # - my ( $ri_left, $ri_right ) = @_; - - my %saw_chain_type; - my %left_chain_type; - my %right_chain_type; - my %interior_chain_type; - my $nmax = @$ri_right - 1; - - # scan the left and right end tokens of all lines - my $count = 0; - for my $n ( 0 .. $nmax ) { - my $il = $$ri_left[$n]; - my $ir = $$ri_right[$n]; - my $typel = $types_to_go[$il]; - my $typer = $types_to_go[$ir]; - $typel = '+' if ( $typel eq '-' ); # treat + and - the same - $typer = '+' if ( $typer eq '-' ); - $typel = '*' if ( $typel eq '/' ); # treat * and / the same - $typer = '*' if ( $typer eq '/' ); - my $tokenl = $tokens_to_go[$il]; - my $tokenr = $tokens_to_go[$ir]; - - if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { - next if ( $typel eq '?' ); - push @{ $left_chain_type{$typel} }, $il; - $saw_chain_type{$typel} = 1; - $count++; - } - if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { - next if ( $typer eq '?' ); - push @{ $right_chain_type{$typer} }, $ir; - $saw_chain_type{$typer} = 1; - $count++; - } - } - return unless $count; - - # now look for any interior tokens of the same types - $count = 0; - for my $n ( 0 .. $nmax ) { - my $il = $$ri_left[$n]; - my $ir = $$ri_right[$n]; - for ( my $i = $il + 1 ; $i < $ir ; $i++ ) { - my $type = $types_to_go[$i]; - $type = '+' if ( $type eq '-' ); - $type = '*' if ( $type eq '/' ); - if ( $saw_chain_type{$type} ) { - push @{ $interior_chain_type{$type} }, $i; - $count++; - } - } - } - return unless $count; - - # now make a list of all new break points - my @insert_list; - - # loop over all chain types - foreach my $type ( keys %saw_chain_type ) { - - # quit if just ONE continuation line with leading . For example-- - # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' - # . $contents; - last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); - - # loop over all interior chain tokens - foreach my $itest ( @{ $interior_chain_type{$type} } ) { - - # loop over all left end tokens of same type - if ( $left_chain_type{$type} ) { - next if $nobreak_to_go[ $itest - 1 ]; - foreach my $i ( @{ $left_chain_type{$type} } ) { - next unless in_same_container( $i, $itest ); - push @insert_list, $itest - 1; - - # Break at matching ? if this : is at a different level. - # For example, the ? before $THRf_DEAD in the following - # should get a break if its : gets a break. - # - # my $flags = - # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE - # : ( $_ & 4 ) ? $THRf_R_DETACHED - # : $THRf_R_JOINABLE; - if ( $type eq ':' - && $levels_to_go[$i] != $levels_to_go[$itest] ) - { - my $i_question = $mate_index_to_go[$itest]; - if ( $i_question > 0 ) { - push @insert_list, $i_question - 1; - } - } - last; - } - } - - # loop over all right end tokens of same type - if ( $right_chain_type{$type} ) { - next if $nobreak_to_go[$itest]; - foreach my $i ( @{ $right_chain_type{$type} } ) { - next unless in_same_container( $i, $itest ); - push @insert_list, $itest; - - # break at matching ? if this : is at a different level - if ( $type eq ':' - && $levels_to_go[$i] != $levels_to_go[$itest] ) - { - my $i_question = $mate_index_to_go[$itest]; - if ( $i_question >= 0 ) { - push @insert_list, $i_question; - } - } - last; - } - } - } - } - - # insert any new break points - if (@insert_list) { - insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } -} - -sub break_equals { - - # Look for assignment operators that could use a breakpoint. - # For example, in the following snippet - # - # $HOME = $ENV{HOME} - # || $ENV{LOGDIR} - # || $pw[7] - # || die "no home directory for user $<"; - # - # we could break at the = to get this, which is a little nicer: - # $HOME = - # $ENV{HOME} - # || $ENV{LOGDIR} - # || $pw[7] - # || die "no home directory for user $<"; - # - # The logic here follows the logic in set_logical_padding, which - # will add the padding in the second line to improve alignment. - # - my ( $ri_left, $ri_right ) = @_; - my $nmax = @$ri_right - 1; - return unless ( $nmax >= 2 ); - - # scan the left ends of first two lines - my $tokbeg = ""; - my $depth_beg; - for my $n ( 1 .. 2 ) { - my $il = $$ri_left[$n]; - my $typel = $types_to_go[$il]; - my $tokenl = $tokens_to_go[$il]; - - my $has_leading_op = ( $tokenl =~ /^\w/ ) - ? $is_chain_operator{$tokenl} # + - * / : ? && || - : $is_chain_operator{$typel}; # and, or - return unless ($has_leading_op); - if ( $n > 1 ) { - return - unless ( $tokenl eq $tokbeg - && $nesting_depth_to_go[$il] eq $depth_beg ); - } - $tokbeg = $tokenl; - $depth_beg = $nesting_depth_to_go[$il]; - } - - # now look for any interior tokens of the same types - my $il = $$ri_left[0]; - my $ir = $$ri_right[0]; - - # now make a list of all new break points - my @insert_list; - for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { - my $type = $types_to_go[$i]; - if ( $is_assignment{$type} - && $nesting_depth_to_go[$i] eq $depth_beg ) - { - if ( $want_break_before{$type} ) { - push @insert_list, $i - 1; - } - else { - push @insert_list, $i; - } - } - } - - # Break after a 'return' followed by a chain of operators - # return ( $^O !~ /win32|dos/i ) - # && ( $^O ne 'VMS' ) - # && ( $^O ne 'OS2' ) - # && ( $^O ne 'MacOS' ); - # To give: - # return - # ( $^O !~ /win32|dos/i ) - # && ( $^O ne 'VMS' ) - # && ( $^O ne 'OS2' ) - # && ( $^O ne 'MacOS' ); - my $i = 0; - if ( $types_to_go[$i] eq 'k' - && $tokens_to_go[$i] eq 'return' - && $ir > $il - && $nesting_depth_to_go[$i] eq $depth_beg ) - { - push @insert_list, $i; - } - - return unless (@insert_list); - - # One final check... - # scan second and thrid lines and be sure there are no assignments - # we want to avoid breaking at an = to make something like this: - # unless ( $icon = - # $html_icons{"$type-$state"} - # or $icon = $html_icons{$type} - # or $icon = $html_icons{$state} ) - for my $n ( 1 .. 2 ) { - my $il = $$ri_left[$n]; - my $ir = $$ri_right[$n]; - for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) { - my $type = $types_to_go[$i]; - return - if ( $is_assignment{$type} - && $nesting_depth_to_go[$i] eq $depth_beg ); - } - } - - # ok, insert any new break point - if (@insert_list) { - insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } -} - -sub insert_final_breaks { - - my ( $ri_left, $ri_right ) = @_; - - my $nmax = @$ri_right - 1; - - # scan the left and right end tokens of all lines - my $count = 0; - my $i_first_colon = -1; - for my $n ( 0 .. $nmax ) { - my $il = $$ri_left[$n]; - my $ir = $$ri_right[$n]; - my $typel = $types_to_go[$il]; - my $typer = $types_to_go[$ir]; - return if ( $typel eq '?' ); - return if ( $typer eq '?' ); - if ( $typel eq ':' ) { $i_first_colon = $il; last; } - elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } - } - - # For long ternary chains, - # if the first : we see has its # ? is in the interior - # of a preceding line, then see if there are any good - # breakpoints before the ?. - if ( $i_first_colon > 0 ) { - my $i_question = $mate_index_to_go[$i_first_colon]; - if ( $i_question > 0 ) { - my @insert_list; - for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { - my $token = $tokens_to_go[$ii]; - my $type = $types_to_go[$ii]; - - # For now, a good break is either a comma or a 'return'. - if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) - && in_same_container( $ii, $i_question ) ) - { - push @insert_list, $ii; - last; - } - } - - # insert any new break points - if (@insert_list) { - insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } - } - } -} - -sub in_same_container { - - # check to see if tokens at i1 and i2 are in the - # same container, and not separated by a comma, ? or : - my ( $i1, $i2 ) = @_; - my $type = $types_to_go[$i1]; - my $depth = $nesting_depth_to_go[$i1]; - return unless ( $nesting_depth_to_go[$i2] == $depth ); - if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } - - ########################################################### - # This is potentially a very slow routine and not critical. - # For safety just give up for large differences. - # See test file 'infinite_loop.txt' - # TODO: replace this loop with a data structure - ########################################################### - return if ( $i2-$i1 > 200 ); - - for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) { - next if ( $nesting_depth_to_go[$i] > $depth ); - return if ( $nesting_depth_to_go[$i] < $depth ); - - my $tok = $tokens_to_go[$i]; - $tok = ',' if $tok eq '=>'; # treat => same as , - - # Example: we would not want to break at any of these .'s - # : "$str" - if ( $type ne ':' ) { - return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; - } - else { - return if ( $tok =~ /^[\,]$/ ); - } - } - return 1; -} - -sub set_continuation_breaks { - - # Define an array of indexes for inserting newline characters to - # keep the line lengths below the maximum desired length. There is - # an implied break after the last token, so it need not be included. - - # Method: - # This routine is part of series of routines which adjust line - # lengths. It is only called if a statement is longer than the - # maximum line length, or if a preliminary scanning located - # desirable break points. Sub scan_list has already looked at - # these tokens and set breakpoints (in array - # $forced_breakpoint_to_go[$i]) where it wants breaks (for example - # after commas, after opening parens, and before closing parens). - # This routine will honor these breakpoints and also add additional - # breakpoints as necessary to keep the line length below the maximum - # requested. It bases its decision on where the 'bond strength' is - # lowest. - - # Output: returns references to the arrays: - # @i_first - # @i_last - # which contain the indexes $i of the first and last tokens on each - # line. - - # In addition, the array: - # $forced_breakpoint_to_go[$i] - # may be updated to be =1 for any index $i after which there must be - # a break. This signals later routines not to undo the breakpoint. - - my $saw_good_break = shift; - my @i_first = (); # the first index to output - my @i_last = (); # the last index to output - my @i_colon_breaks = (); # needed to decide if we have to break at ?'s - if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } - - set_bond_strengths(); - - my $imin = 0; - my $imax = $max_index_to_go; - if ( $types_to_go[$imin] eq 'b' ) { $imin++ } - if ( $types_to_go[$imax] eq 'b' ) { $imax-- } - my $i_begin = $imin; # index for starting next iteration - - my $leading_spaces = leading_spaces_to_go($imin); - my $line_count = 0; - my $last_break_strength = NO_BREAK; - my $i_last_break = -1; - my $max_bias = 0.001; - my $tiny_bias = 0.0001; - my $leading_alignment_token = ""; - my $leading_alignment_type = ""; - - # see if any ?/:'s are in order - my $colons_in_order = 1; - my $last_tok = ""; - my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ]; - my $colon_count = @colon_list; - foreach (@colon_list) { - if ( $_ eq $last_tok ) { $colons_in_order = 0; last } - $last_tok = $_; - } - - # This is a sufficient but not necessary condition for colon chain - my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); - - #------------------------------------------------------- - # BEGINNING of main loop to set continuation breakpoints - # Keep iterating until we reach the end - #------------------------------------------------------- - while ( $i_begin <= $imax ) { - my $lowest_strength = NO_BREAK; - my $starting_sum = $lengths_to_go[$i_begin]; - my $i_lowest = -1; - my $i_test = -1; - my $lowest_next_token = ''; - my $lowest_next_type = 'b'; - my $i_lowest_next_nonblank = -1; - - #------------------------------------------------------- - # BEGINNING of inner loop to find the best next breakpoint - #------------------------------------------------------- - for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { - my $type = $types_to_go[$i_test]; - my $token = $tokens_to_go[$i_test]; - my $next_type = $types_to_go[ $i_test + 1 ]; - my $next_token = $tokens_to_go[ $i_test + 1 ]; - my $i_next_nonblank = - ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 ); - my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - my $strength = $bond_strength_to_go[$i_test]; - my $must_break = 0; - - # FIXME: TESTING: Might want to be able to break after these - # force an immediate break at certain operators - # with lower level than the start of the line - if ( - ( - $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ - || ( $next_nonblank_type eq 'k' - && $next_nonblank_token =~ /^(and|or)$/ ) - ) - && ( $nesting_depth_to_go[$i_begin] > - $nesting_depth_to_go[$i_next_nonblank] ) - ) - { - set_forced_breakpoint($i_next_nonblank); - } - - if ( - - # Try to put a break where requested by scan_list - $forced_breakpoint_to_go[$i_test] - - # break between ) { in a continued line so that the '{' can - # be outdented - # See similar logic in scan_list which catches instances - # where a line is just something like ') {' - || ( $line_count - && ( $token eq ')' ) - && ( $next_nonblank_type eq '{' ) - && ($next_nonblank_block_type) - && !$rOpts->{'opening-brace-always-on-right'} ) - - # There is an implied forced break at a terminal opening brace - || ( ( $type eq '{' ) && ( $i_test == $imax ) ) - ) - { - - # Forced breakpoints must sometimes be overridden, for example - # because of a side comment causing a NO_BREAK. It is easier - # to catch this here than when they are set. - if ( $strength < NO_BREAK ) { - $strength = $lowest_strength - $tiny_bias; - $must_break = 1; - } - } - - # quit if a break here would put a good terminal token on - # the next line and we already have a possible break - if ( - !$must_break - && ( $next_nonblank_type =~ /^[\;\,]$/ ) - && ( - ( - $leading_spaces + - $lengths_to_go[ $i_next_nonblank + 1 ] - - $starting_sum - ) > $rOpts_maximum_line_length - ) - ) - { - last if ( $i_lowest >= 0 ); - } - - # Avoid a break which would strand a single punctuation - # token. For example, we do not want to strand a leading - # '.' which is followed by a long quoted string. - if ( - !$must_break - && ( $i_test == $i_begin ) - && ( $i_test < $imax ) - && ( $token eq $type ) - && ( - ( - $leading_spaces + - $lengths_to_go[ $i_test + 1 ] - - $starting_sum - ) <= $rOpts_maximum_line_length - ) - ) - { - $i_test++; - - if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) { - $i_test++; - } - redo; - } - - if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) - { - - # break at previous best break if it would have produced - # a leading alignment of certain common tokens, and it - # is different from the latest candidate break - last - if ($leading_alignment_type); - - # Force at least one breakpoint if old code had good - # break It is only called if a breakpoint is required or - # desired. This will probably need some adjustments - # over time. A goal is to try to be sure that, if a new - # side comment is introduced into formated text, then - # the same breakpoints will occur. scbreak.t - last - if ( - $i_test == $imax # we are at the end - && !$forced_breakpoint_count # - && $saw_good_break # old line had good break - && $type =~ /^[#;\{]$/ # and this line ends in - # ';' or side comment - && $i_last_break < 0 # and we haven't made a break - && $i_lowest > 0 # and we saw a possible break - && $i_lowest < $imax - 1 # (but not just before this ;) - && $strength - $lowest_strength < 0.5 * WEAK # and it's good - ); - - $lowest_strength = $strength; - $i_lowest = $i_test; - $lowest_next_token = $next_nonblank_token; - $lowest_next_type = $next_nonblank_type; - $i_lowest_next_nonblank = $i_next_nonblank; - last if $must_break; - - # set flags to remember if a break here will produce a - # leading alignment of certain common tokens - if ( $line_count > 0 - && $i_test < $imax - && ( $lowest_strength - $last_break_strength <= $max_bias ) - ) - { - my $i_last_end = $i_begin - 1; - if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 } - my $tok_beg = $tokens_to_go[$i_begin]; - my $type_beg = $types_to_go[$i_begin]; - if ( - - # check for leading alignment of certain tokens - ( - $tok_beg eq $next_nonblank_token - && $is_chain_operator{$tok_beg} - && ( $type_beg eq 'k' - || $type_beg eq $tok_beg ) - && $nesting_depth_to_go[$i_begin] >= - $nesting_depth_to_go[$i_next_nonblank] - ) - - || ( $tokens_to_go[$i_last_end] eq $token - && $is_chain_operator{$token} - && ( $type eq 'k' || $type eq $token ) - && $nesting_depth_to_go[$i_last_end] >= - $nesting_depth_to_go[$i_test] ) - ) - { - $leading_alignment_token = $next_nonblank_token; - $leading_alignment_type = $next_nonblank_type; - } - } - } - - my $too_long = - ( $i_test >= $imax ) - ? 1 - : ( - ( - $leading_spaces + - $lengths_to_go[ $i_test + 2 ] - - $starting_sum - ) > $rOpts_maximum_line_length - ); - - FORMATTER_DEBUG_FLAG_BREAK - && print -"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n"; - - # allow one extra terminal token after exceeding line length - # if it would strand this token. - if ( $rOpts_fuzzy_line_length - && $too_long - && ( $i_lowest == $i_test ) - && ( length($token) > 1 ) - && ( $next_nonblank_type =~ /^[\;\,]$/ ) ) - { - $too_long = 0; - } - - last - if ( - ( $i_test == $imax ) # we're done if no more tokens, - || ( - ( $i_lowest >= 0 ) # or no more space and we have a break - && $too_long - ) - ); - } - - #------------------------------------------------------- - # END of inner loop to find the best next breakpoint - # Now decide exactly where to put the breakpoint - #------------------------------------------------------- - - # it's always ok to break at imax if no other break was found - if ( $i_lowest < 0 ) { $i_lowest = $imax } - - # semi-final index calculation - my $i_next_nonblank = ( - ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) - ? $i_lowest + 2 - : $i_lowest + 1 - ); - my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - - #------------------------------------------------------- - # ?/: rule 1 : if a break here will separate a '?' on this - # line from its closing ':', then break at the '?' instead. - #------------------------------------------------------- - my $i; - foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) { - next unless ( $tokens_to_go[$i] eq '?' ); - - # do not break if probable sequence of ?/: statements - next if ($is_colon_chain); - - # do not break if statement is broken by side comment - next - if ( - $tokens_to_go[$max_index_to_go] eq '#' - && terminal_type( \@types_to_go, \@block_type_to_go, 0, - $max_index_to_go ) !~ /^[\;\}]$/ - ); - - # no break needed if matching : is also on the line - next - if ( $mate_index_to_go[$i] >= 0 - && $mate_index_to_go[$i] <= $i_next_nonblank ); - - $i_lowest = $i; - if ( $want_break_before{'?'} ) { $i_lowest-- } - last; - } - - #------------------------------------------------------- - # END of inner loop to find the best next breakpoint: - # Break the line after the token with index i=$i_lowest - #------------------------------------------------------- - - # final index calculation - $i_next_nonblank = ( - ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) - ? $i_lowest + 2 - : $i_lowest + 1 - ); - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - - FORMATTER_DEBUG_FLAG_BREAK - && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; - - #------------------------------------------------------- - # ?/: rule 2 : if we break at a '?', then break at its ':' - # - # Note: this rule is also in sub scan_list to handle a break - # at the start and end of a line (in case breaks are dictated - # by side comments). - #------------------------------------------------------- - if ( $next_nonblank_type eq '?' ) { - set_closing_breakpoint($i_next_nonblank); - } - elsif ( $types_to_go[$i_lowest] eq '?' ) { - set_closing_breakpoint($i_lowest); - } - - #------------------------------------------------------- - # ?/: rule 3 : if we break at a ':' then we save - # its location for further work below. We may need to go - # back and break at its '?'. - #------------------------------------------------------- - if ( $next_nonblank_type eq ':' ) { - push @i_colon_breaks, $i_next_nonblank; - } - elsif ( $types_to_go[$i_lowest] eq ':' ) { - push @i_colon_breaks, $i_lowest; - } - - # here we should set breaks for all '?'/':' pairs which are - # separated by this line - - $line_count++; - - # save this line segment, after trimming blanks at the ends - push( @i_first, - ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); - push( @i_last, - ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); - - # set a forced breakpoint at a container opening, if necessary, to - # signal a break at a closing container. Excepting '(' for now. - if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ - && !$forced_breakpoint_to_go[$i_lowest] ) - { - set_closing_breakpoint($i_lowest); - } - - # get ready to go again - $i_begin = $i_lowest + 1; - $last_break_strength = $lowest_strength; - $i_last_break = $i_lowest; - $leading_alignment_token = ""; - $leading_alignment_type = ""; - $lowest_next_token = ''; - $lowest_next_type = 'b'; - - if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { - $i_begin++; - } - - # update indentation size - if ( $i_begin <= $imax ) { - $leading_spaces = leading_spaces_to_go($i_begin); - } - } - - #------------------------------------------------------- - # END of main loop to set continuation breakpoints - # Now go back and make any necessary corrections - #------------------------------------------------------- - - #------------------------------------------------------- - # ?/: rule 4 -- if we broke at a ':', then break at - # corresponding '?' unless this is a chain of ?: expressions - #------------------------------------------------------- - if (@i_colon_breaks) { - - # using a simple method for deciding if we are in a ?/: chain -- - # this is a chain if it has multiple ?/: pairs all in order; - # otherwise not. - # Note that if line starts in a ':' we count that above as a break - my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); - - unless ($is_chain) { - my @insert_list = (); - foreach (@i_colon_breaks) { - my $i_question = $mate_index_to_go[$_]; - if ( $i_question >= 0 ) { - if ( $want_break_before{'?'} ) { - $i_question--; - if ( $i_question > 0 - && $types_to_go[$i_question] eq 'b' ) - { - $i_question--; - } - } - - if ( $i_question >= 0 ) { - push @insert_list, $i_question; - } - } - insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); - } - } - } - return ( \@i_first, \@i_last, $colon_count ); -} - -sub insert_additional_breaks { - - # this routine will add line breaks at requested locations after - # sub set_continuation_breaks has made preliminary breaks. - - my ( $ri_break_list, $ri_first, $ri_last ) = @_; - my $i_f; - my $i_l; - my $line_number = 0; - my $i_break_left; - foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) { - - $i_f = $$ri_first[$line_number]; - $i_l = $$ri_last[$line_number]; - while ( $i_break_left >= $i_l ) { - $line_number++; - - # shouldn't happen unless caller passes bad indexes - if ( $line_number >= @$ri_last ) { - warning( -"Non-fatal program bug: couldn't set break at $i_break_left\n" - ); - report_definite_bug(); - return; - } - $i_f = $$ri_first[$line_number]; - $i_l = $$ri_last[$line_number]; - } - - my $i_break_right = $i_break_left + 1; - if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ } - - if ( $i_break_left >= $i_f - && $i_break_left < $i_l - && $i_break_right > $i_f - && $i_break_right <= $i_l ) - { - splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) ); - splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) ); - } - } -} - -sub set_closing_breakpoint { - - # set a breakpoint at a matching closing token - # at present, this is only used to break at a ':' which matches a '?' - my $i_break = shift; - - if ( $mate_index_to_go[$i_break] >= 0 ) { - - # CAUTION: infinite recursion possible here: - # set_closing_breakpoint calls set_forced_breakpoint, and - # set_forced_breakpoint call set_closing_breakpoint - # ( test files attrib.t, BasicLyx.pm.html). - # Don't reduce the '2' in the statement below - if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { - - # break before } ] and ), but sub set_forced_breakpoint will decide - # to break before or after a ? and : - my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; - set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); - } - } - else { - my $type_sequence = $type_sequence_to_go[$i_break]; - if ($type_sequence) { - my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; - $postponed_breakpoint{$type_sequence} = 1; - } - } -} - -# check to see if output line tabbing agrees with input line -# this can be very useful for debugging a script which has an extra -# or missing brace -sub compare_indentation_levels { - - my ( $python_indentation_level, $structural_indentation_level ) = @_; - if ( ( $python_indentation_level ne $structural_indentation_level ) ) { - $last_tabbing_disagreement = $input_line_number; - - if ($in_tabbing_disagreement) { - } - else { - $tabbing_disagreement_count++; - - if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { - write_logfile_entry( -"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n" - ); - } - $in_tabbing_disagreement = $input_line_number; - $first_tabbing_disagreement = $in_tabbing_disagreement - unless ($first_tabbing_disagreement); - } - } - else { - - if ($in_tabbing_disagreement) { - - if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { - write_logfile_entry( -"End indentation disagreement from input line $in_tabbing_disagreement\n" - ); - - if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { - write_logfile_entry( - "No further tabbing disagreements will be noted\n"); - } - } - $in_tabbing_disagreement = 0; - } - } -} - -##################################################################### -# -# the Perl::Tidy::IndentationItem class supplies items which contain -# how much whitespace should be used at the start of a line -# -##################################################################### - -package Perl::Tidy::IndentationItem; - -# Indexes for indentation items -use constant SPACES => 0; # total leading white spaces -use constant LEVEL => 1; # the indentation 'level' -use constant CI_LEVEL => 2; # the 'continuation level' -use constant AVAILABLE_SPACES => 3; # how many left spaces available - # for this level -use constant CLOSED => 4; # index where we saw closing '}' -use constant COMMA_COUNT => 5; # how many commas at this level? -use constant SEQUENCE_NUMBER => 6; # output batch number -use constant INDEX => 7; # index in output batch list -use constant HAVE_CHILD => 8; # any dependents? -use constant RECOVERABLE_SPACES => 9; # how many spaces to the right - # we would like to move to get - # alignment (negative if left) -use constant ALIGN_PAREN => 10; # do we want to try to align - # with an opening structure? -use constant MARKED => 11; # if visited by corrector logic -use constant STACK_DEPTH => 12; # indentation nesting depth -use constant STARTING_INDEX => 13; # first token index of this level -use constant ARROW_COUNT => 14; # how many =>'s - -sub new { - - # Create an 'indentation_item' which describes one level of leading - # whitespace when the '-lp' indentation is used. We return - # a reference to an anonymous array of associated variables. - # See above constants for storage scheme. - my ( - $class, $spaces, $level, - $ci_level, $available_spaces, $index, - $gnu_sequence_number, $align_paren, $stack_depth, - $starting_index, - ) = @_; - my $closed = -1; - my $arrow_count = 0; - my $comma_count = 0; - my $have_child = 0; - my $want_right_spaces = 0; - my $marked = 0; - bless [ - $spaces, $level, $ci_level, - $available_spaces, $closed, $comma_count, - $gnu_sequence_number, $index, $have_child, - $want_right_spaces, $align_paren, $marked, - $stack_depth, $starting_index, $arrow_count, - ], $class; -} - -sub permanently_decrease_AVAILABLE_SPACES { - - # make a permanent reduction in the available indentation spaces - # at one indentation item. NOTE: if there are child nodes, their - # total SPACES must be reduced by the caller. - - my ( $item, $spaces_needed ) = @_; - my $available_spaces = $item->get_AVAILABLE_SPACES(); - my $deleted_spaces = - ( $available_spaces > $spaces_needed ) - ? $spaces_needed - : $available_spaces; - $item->decrease_AVAILABLE_SPACES($deleted_spaces); - $item->decrease_SPACES($deleted_spaces); - $item->set_RECOVERABLE_SPACES(0); - - return $deleted_spaces; -} - -sub tentatively_decrease_AVAILABLE_SPACES { - - # We are asked to tentatively delete $spaces_needed of indentation - # for a indentation item. We may want to undo this later. NOTE: if - # there are child nodes, their total SPACES must be reduced by the - # caller. - my ( $item, $spaces_needed ) = @_; - my $available_spaces = $item->get_AVAILABLE_SPACES(); - my $deleted_spaces = - ( $available_spaces > $spaces_needed ) - ? $spaces_needed - : $available_spaces; - $item->decrease_AVAILABLE_SPACES($deleted_spaces); - $item->decrease_SPACES($deleted_spaces); - $item->increase_RECOVERABLE_SPACES($deleted_spaces); - return $deleted_spaces; -} - -sub get_STACK_DEPTH { - my $self = shift; - return $self->[STACK_DEPTH]; -} - -sub get_SPACES { - my $self = shift; - return $self->[SPACES]; -} - -sub get_MARKED { - my $self = shift; - return $self->[MARKED]; -} - -sub set_MARKED { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[MARKED] = $value; - } - return $self->[MARKED]; -} - -sub get_AVAILABLE_SPACES { - my $self = shift; - return $self->[AVAILABLE_SPACES]; -} - -sub decrease_SPACES { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[SPACES] -= $value; - } - return $self->[SPACES]; -} - -sub decrease_AVAILABLE_SPACES { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[AVAILABLE_SPACES] -= $value; - } - return $self->[AVAILABLE_SPACES]; -} - -sub get_ALIGN_PAREN { - my $self = shift; - return $self->[ALIGN_PAREN]; -} - -sub get_RECOVERABLE_SPACES { - my $self = shift; - return $self->[RECOVERABLE_SPACES]; -} - -sub set_RECOVERABLE_SPACES { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[RECOVERABLE_SPACES] = $value; - } - return $self->[RECOVERABLE_SPACES]; -} - -sub increase_RECOVERABLE_SPACES { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[RECOVERABLE_SPACES] += $value; - } - return $self->[RECOVERABLE_SPACES]; -} - -sub get_CI_LEVEL { - my $self = shift; - return $self->[CI_LEVEL]; -} - -sub get_LEVEL { - my $self = shift; - return $self->[LEVEL]; -} - -sub get_SEQUENCE_NUMBER { - my $self = shift; - return $self->[SEQUENCE_NUMBER]; -} - -sub get_INDEX { - my $self = shift; - return $self->[INDEX]; -} - -sub get_STARTING_INDEX { - my $self = shift; - return $self->[STARTING_INDEX]; -} - -sub set_HAVE_CHILD { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[HAVE_CHILD] = $value; - } - return $self->[HAVE_CHILD]; -} - -sub get_HAVE_CHILD { - my $self = shift; - return $self->[HAVE_CHILD]; -} - -sub set_ARROW_COUNT { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[ARROW_COUNT] = $value; - } - return $self->[ARROW_COUNT]; -} - -sub get_ARROW_COUNT { - my $self = shift; - return $self->[ARROW_COUNT]; -} - -sub set_COMMA_COUNT { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[COMMA_COUNT] = $value; - } - return $self->[COMMA_COUNT]; -} - -sub get_COMMA_COUNT { - my $self = shift; - return $self->[COMMA_COUNT]; -} - -sub set_CLOSED { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->[CLOSED] = $value; - } - return $self->[CLOSED]; -} - -sub get_CLOSED { - my $self = shift; - return $self->[CLOSED]; -} - -##################################################################### -# -# the Perl::Tidy::VerticalAligner::Line class supplies an object to -# contain a single output line -# -##################################################################### - -package Perl::Tidy::VerticalAligner::Line; - -{ - - use strict; - use Carp; - - use constant JMAX => 0; - use constant JMAX_ORIGINAL_LINE => 1; - use constant RTOKENS => 2; - use constant RFIELDS => 3; - use constant RPATTERNS => 4; - use constant INDENTATION => 5; - use constant LEADING_SPACE_COUNT => 6; - use constant OUTDENT_LONG_LINES => 7; - use constant LIST_TYPE => 8; - use constant IS_HANGING_SIDE_COMMENT => 9; - use constant RALIGNMENTS => 10; - use constant MAXIMUM_LINE_LENGTH => 11; - use constant RVERTICAL_TIGHTNESS_FLAGS => 12; - - my %_index_map; - $_index_map{jmax} = JMAX; - $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE; - $_index_map{rtokens} = RTOKENS; - $_index_map{rfields} = RFIELDS; - $_index_map{rpatterns} = RPATTERNS; - $_index_map{indentation} = INDENTATION; - $_index_map{leading_space_count} = LEADING_SPACE_COUNT; - $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES; - $_index_map{list_type} = LIST_TYPE; - $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT; - $_index_map{ralignments} = RALIGNMENTS; - $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH; - $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS; - - my @_default_data = (); - $_default_data[JMAX] = undef; - $_default_data[JMAX_ORIGINAL_LINE] = undef; - $_default_data[RTOKENS] = undef; - $_default_data[RFIELDS] = undef; - $_default_data[RPATTERNS] = undef; - $_default_data[INDENTATION] = undef; - $_default_data[LEADING_SPACE_COUNT] = undef; - $_default_data[OUTDENT_LONG_LINES] = undef; - $_default_data[LIST_TYPE] = undef; - $_default_data[IS_HANGING_SIDE_COMMENT] = undef; - $_default_data[RALIGNMENTS] = []; - $_default_data[MAXIMUM_LINE_LENGTH] = undef; - $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef; - - { - - # methods to count object population - my $_count = 0; - sub get_count { $_count; } - sub _increment_count { ++$_count } - sub _decrement_count { --$_count } - } - - # Constructor may be called as a class method - sub new { - my ( $caller, %arg ) = @_; - my $caller_is_obj = ref($caller); - my $class = $caller_is_obj || $caller; - no strict "refs"; - my $self = bless [], $class; - - $self->[RALIGNMENTS] = []; - - my $index; - foreach ( keys %_index_map ) { - $index = $_index_map{$_}; - if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } - elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } - else { $self->[$index] = $_default_data[$index] } - } - - $self->_increment_count(); - return $self; - } - - sub DESTROY { - $_[0]->_decrement_count(); - } - - sub get_jmax { $_[0]->[JMAX] } - sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] } - sub get_rtokens { $_[0]->[RTOKENS] } - sub get_rfields { $_[0]->[RFIELDS] } - sub get_rpatterns { $_[0]->[RPATTERNS] } - sub get_indentation { $_[0]->[INDENTATION] } - sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] } - sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] } - sub get_list_type { $_[0]->[LIST_TYPE] } - sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] } - sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] } - - sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) } - sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] } - sub get_alignments { @{ $_[0]->[RALIGNMENTS] } } - sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() } - - sub get_starting_column { - $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column(); - } - - sub increment_column { - $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] ); - } - sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; } - - sub current_field_width { - my $self = shift; - my ($j) = @_; - if ( $j == 0 ) { - return $self->get_column($j); - } - else { - return $self->get_column($j) - $self->get_column( $j - 1 ); - } - } - - sub field_width_growth { - my $self = shift; - my $j = shift; - return $self->get_column($j) - $self->get_starting_column($j); - } - - sub starting_field_width { - my $self = shift; - my $j = shift; - if ( $j == 0 ) { - return $self->get_starting_column($j); - } - else { - return $self->get_starting_column($j) - - $self->get_starting_column( $j - 1 ); - } - } - - sub increase_field_width { - - my $self = shift; - my ( $j, $pad ) = @_; - my $jmax = $self->get_jmax(); - for my $k ( $j .. $jmax ) { - $self->increment_column( $k, $pad ); - } - } - - sub get_available_space_on_right { - my $self = shift; - my $jmax = $self->get_jmax(); - return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax); - } - - sub set_jmax { $_[0]->[JMAX] = $_[1] } - sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] } - sub set_rtokens { $_[0]->[RTOKENS] = $_[1] } - sub set_rfields { $_[0]->[RFIELDS] = $_[1] } - sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] } - sub set_indentation { $_[0]->[INDENTATION] = $_[1] } - sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] } - sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] } - sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] } - sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] } - sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] } - -} - -##################################################################### -# -# the Perl::Tidy::VerticalAligner::Alignment class holds information -# on a single column being aligned -# -##################################################################### -package Perl::Tidy::VerticalAligner::Alignment; - -{ - - use strict; - - #use Carp; - - # Symbolic array indexes - use constant COLUMN => 0; # the current column number - use constant STARTING_COLUMN => 1; # column number when created - use constant MATCHING_TOKEN => 2; # what token we are matching - use constant STARTING_LINE => 3; # the line index of creation - use constant ENDING_LINE => 4; # the most recent line to use it - use constant SAVED_COLUMN => 5; # the most recent line to use it - use constant SERIAL_NUMBER => 6; # unique number for this alignment - # (just its index in an array) - - # Correspondence between variables and array indexes - my %_index_map; - $_index_map{column} = COLUMN; - $_index_map{starting_column} = STARTING_COLUMN; - $_index_map{matching_token} = MATCHING_TOKEN; - $_index_map{starting_line} = STARTING_LINE; - $_index_map{ending_line} = ENDING_LINE; - $_index_map{saved_column} = SAVED_COLUMN; - $_index_map{serial_number} = SERIAL_NUMBER; - - my @_default_data = (); - $_default_data[COLUMN] = undef; - $_default_data[STARTING_COLUMN] = undef; - $_default_data[MATCHING_TOKEN] = undef; - $_default_data[STARTING_LINE] = undef; - $_default_data[ENDING_LINE] = undef; - $_default_data[SAVED_COLUMN] = undef; - $_default_data[SERIAL_NUMBER] = undef; - - # class population count - { - my $_count = 0; - sub get_count { $_count; } - sub _increment_count { ++$_count } - sub _decrement_count { --$_count } - } - - # constructor - sub new { - my ( $caller, %arg ) = @_; - my $caller_is_obj = ref($caller); - my $class = $caller_is_obj || $caller; - no strict "refs"; - my $self = bless [], $class; - - foreach ( keys %_index_map ) { - my $index = $_index_map{$_}; - if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } - elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } - else { $self->[$index] = $_default_data[$index] } - } - $self->_increment_count(); - return $self; - } - - sub DESTROY { - $_[0]->_decrement_count(); - } - - sub get_column { return $_[0]->[COLUMN] } - sub get_starting_column { return $_[0]->[STARTING_COLUMN] } - sub get_matching_token { return $_[0]->[MATCHING_TOKEN] } - sub get_starting_line { return $_[0]->[STARTING_LINE] } - sub get_ending_line { return $_[0]->[ENDING_LINE] } - sub get_serial_number { return $_[0]->[SERIAL_NUMBER] } - - sub set_column { $_[0]->[COLUMN] = $_[1] } - sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] } - sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] } - sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] } - sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] } - sub increment_column { $_[0]->[COLUMN] += $_[1] } - - sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] } - sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] } - -} - -package Perl::Tidy::VerticalAligner; - -# The Perl::Tidy::VerticalAligner package collects output lines and -# attempts to line up certain common tokens, such as => and #, which are -# identified by the calling routine. -# -# There are two main routines: append_line and flush. Append acts as a -# storage buffer, collecting lines into a group which can be vertically -# aligned. When alignment is no longer possible or desirable, it dumps -# the group to flush. -# -# append_line -----> flush -# -# collects writes -# vertical one -# groups group - -BEGIN { - - # Caution: these debug flags produce a lot of output - # They should all be 0 except when debugging small scripts - - use constant VALIGN_DEBUG_FLAG_APPEND => 0; - use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; - use constant VALIGN_DEBUG_FLAG_TERNARY => 0; - - my $debug_warning = sub { - print "VALIGN_DEBUGGING with key $_[0]\n"; - }; - - VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); - VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); - -} - -use vars qw( - $vertical_aligner_self - $current_line - $maximum_alignment_index - $ralignment_list - $maximum_jmax_seen - $minimum_jmax_seen - $previous_minimum_jmax_seen - $previous_maximum_jmax_seen - $maximum_line_index - $group_level - $group_type - $group_maximum_gap - $marginal_match - $last_group_level_written - $last_leading_space_count - $extra_indent_ok - $zero_count - @group_lines - $last_comment_column - $last_side_comment_line_number - $last_side_comment_length - $last_side_comment_level - $outdented_line_count - $first_outdented_line_at - $last_outdented_line_at - $diagnostics_object - $logger_object - $file_writer_object - @side_comment_history - $comment_leading_space_count - $is_matching_terminal_line - - $cached_line_text - $cached_line_type - $cached_line_flag - $cached_seqno - $cached_line_valid - $cached_line_leading_space_count - $cached_seqno_string - - $seqno_string - $last_nonblank_seqno_string - - $rOpts - - $rOpts_maximum_line_length - $rOpts_continuation_indentation - $rOpts_indent_columns - $rOpts_tabs - $rOpts_entab_leading_whitespace - $rOpts_valign - - $rOpts_fixed_position_side_comment - $rOpts_minimum_space_to_comment - -); - -sub initialize { - - my $class; - - ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) - = @_; - - # variables describing the entire space group: - $ralignment_list = []; - $group_level = 0; - $last_group_level_written = -1; - $extra_indent_ok = 0; # can we move all lines to the right? - $last_side_comment_length = 0; - $maximum_jmax_seen = 0; - $minimum_jmax_seen = 0; - $previous_minimum_jmax_seen = 0; - $previous_maximum_jmax_seen = 0; - - # variables describing each line of the group - @group_lines = (); # list of all lines in group - - $outdented_line_count = 0; - $first_outdented_line_at = 0; - $last_outdented_line_at = 0; - $last_side_comment_line_number = 0; - $last_side_comment_level = -1; - $is_matching_terminal_line = 0; - - # most recent 3 side comments; [ line number, column ] - $side_comment_history[0] = [ -300, 0 ]; - $side_comment_history[1] = [ -200, 0 ]; - $side_comment_history[2] = [ -100, 0 ]; - - # write_leader_and_string cache: - $cached_line_text = ""; - $cached_line_type = 0; - $cached_line_flag = 0; - $cached_seqno = 0; - $cached_line_valid = 0; - $cached_line_leading_space_count = 0; - $cached_seqno_string = ""; - - # string of sequence numbers joined together - $seqno_string = ""; - $last_nonblank_seqno_string = ""; - - # frequently used parameters - $rOpts_indent_columns = $rOpts->{'indent-columns'}; - $rOpts_tabs = $rOpts->{'tabs'}; - $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; - $rOpts_fixed_position_side_comment = - $rOpts->{'fixed-position-side-comment'}; - $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; - $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; - $rOpts_valign = $rOpts->{'valign'}; - - forget_side_comment(); - - initialize_for_new_group(); - - $vertical_aligner_self = {}; - bless $vertical_aligner_self, $class; - return $vertical_aligner_self; -} - -sub initialize_for_new_group { - $maximum_line_index = -1; # lines in the current group - $maximum_alignment_index = -1; # alignments in current group - $zero_count = 0; # count consecutive lines without tokens - $current_line = undef; # line being matched for alignment - $group_maximum_gap = 0; # largest gap introduced - $group_type = ""; - $marginal_match = 0; - $comment_leading_space_count = 0; - $last_leading_space_count = 0; -} - -# interface to Perl::Tidy::Diagnostics routines -sub write_diagnostics { - if ($diagnostics_object) { - $diagnostics_object->write_diagnostics(@_); - } -} - -# interface to Perl::Tidy::Logger routines -sub warning { - if ($logger_object) { - $logger_object->warning(@_); - } -} - -sub write_logfile_entry { - if ($logger_object) { - $logger_object->write_logfile_entry(@_); - } -} - -sub report_definite_bug { - if ($logger_object) { - $logger_object->report_definite_bug(); - } -} - -sub get_SPACES { - - # return the number of leading spaces associated with an indentation - # variable $indentation is either a constant number of spaces or an - # object with a get_SPACES method. - my $indentation = shift; - return ref($indentation) ? $indentation->get_SPACES() : $indentation; -} - -sub get_RECOVERABLE_SPACES { - - # return the number of spaces (+ means shift right, - means shift left) - # that we would like to shift a group of lines with the same indentation - # to get them to line up with their opening parens - my $indentation = shift; - return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; -} - -sub get_STACK_DEPTH { - - my $indentation = shift; - return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0; -} - -sub make_alignment { - my ( $col, $token ) = @_; - - # make one new alignment at column $col which aligns token $token - ++$maximum_alignment_index; - my $alignment = new Perl::Tidy::VerticalAligner::Alignment( - column => $col, - starting_column => $col, - matching_token => $token, - starting_line => $maximum_line_index, - ending_line => $maximum_line_index, - serial_number => $maximum_alignment_index, - ); - $ralignment_list->[$maximum_alignment_index] = $alignment; - return $alignment; -} - -sub dump_alignments { - print -"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; - for my $i ( 0 .. $maximum_alignment_index ) { - my $column = $ralignment_list->[$i]->get_column(); - my $starting_column = $ralignment_list->[$i]->get_starting_column(); - my $matching_token = $ralignment_list->[$i]->get_matching_token(); - my $starting_line = $ralignment_list->[$i]->get_starting_line(); - my $ending_line = $ralignment_list->[$i]->get_ending_line(); - print -"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; - } -} - -sub save_alignment_columns { - for my $i ( 0 .. $maximum_alignment_index ) { - $ralignment_list->[$i]->save_column(); - } -} - -sub restore_alignment_columns { - for my $i ( 0 .. $maximum_alignment_index ) { - $ralignment_list->[$i]->restore_column(); - } -} - -sub forget_side_comment { - $last_comment_column = 0; -} - -sub append_line { - - # sub append is called to place one line in the current vertical group. - # - # The input parameters are: - # $level = indentation level of this line - # $rfields = reference to array of fields - # $rpatterns = reference to array of patterns, one per field - # $rtokens = reference to array of tokens starting fields 1,2,.. - # - # Here is an example of what this package does. In this example, - # we are trying to line up both the '=>' and the '#'. - # - # '18' => 'grave', # \` - # '19' => 'acute', # `' - # '20' => 'caron', # \v - # <-tabs-><--field 2 ---><-f3-> - # | | | | - # | | | | - # col1 col2 col3 col4 - # - # The calling routine has already broken the entire line into 3 fields as - # indicated. (So the work of identifying promising common tokens has - # already been done). - # - # In this example, there will be 2 tokens being matched: '=>' and '#'. - # They are the leading parts of fields 2 and 3, but we do need to know - # what they are so that we can dump a group of lines when these tokens - # change. - # - # The fields contain the actual characters of each field. The patterns - # are like the fields, but they contain mainly token types instead - # of tokens, so they have fewer characters. They are used to be - # sure we are matching fields of similar type. - # - # In this example, there will be 4 column indexes being adjusted. The - # first one is always at zero. The interior columns are at the start of - # the matching tokens, and the last one tracks the maximum line length. - # - # Basically, each time a new line comes in, it joins the current vertical - # group if possible. Otherwise it causes the current group to be dumped - # and a new group is started. - # - # For each new group member, the column locations are increased, as - # necessary, to make room for the new fields. When the group is finally - # output, these column numbers are used to compute the amount of spaces of - # padding needed for each field. - # - # Programming note: the fields are assumed not to have any tab characters. - # Tabs have been previously removed except for tabs in quoted strings and - # side comments. Tabs in these fields can mess up the column counting. - # The log file warns the user if there are any such tabs. - - my ( - $level, $level_end, - $indentation, $rfields, - $rtokens, $rpatterns, - $is_forced_break, $outdent_long_lines, - $is_terminal_ternary, $is_terminal_statement, - $do_not_pad, $rvertical_tightness_flags, - $level_jump, - ) = @_; - - # number of fields is $jmax - # number of tokens between fields is $jmax-1 - my $jmax = $#{$rfields}; - - my $leading_space_count = get_SPACES($indentation); - - # set outdented flag to be sure we either align within statements or - # across statement boundaries, but not both. - my $is_outdented = $last_leading_space_count > $leading_space_count; - $last_leading_space_count = $leading_space_count; - - # Patch: undo for hanging side comment - my $is_hanging_side_comment = - ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); - $is_outdented = 0 if $is_hanging_side_comment; - - VALIGN_DEBUG_FLAG_APPEND0 && do { - print -"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; - }; - - # Validate cached line if necessary: If we can produce a container - # with just 2 lines total by combining an existing cached opening - # token with the closing token to follow, then we will mark both - # cached flags as valid. - if ($rvertical_tightness_flags) { - if ( $maximum_line_index <= 0 - && $cached_line_type - && $cached_seqno - && $rvertical_tightness_flags->[2] - && $rvertical_tightness_flags->[2] == $cached_seqno ) - { - $rvertical_tightness_flags->[3] ||= 1; - $cached_line_valid ||= 1; - } - } - - # do not join an opening block brace with an unbalanced line - # unless requested with a flag value of 2 - if ( $cached_line_type == 3 - && $maximum_line_index < 0 - && $cached_line_flag < 2 - && $level_jump != 0 ) - { - $cached_line_valid = 0; - } - - # patch until new aligner is finished - if ($do_not_pad) { my_flush() } - - # shouldn't happen: - if ( $level < 0 ) { $level = 0 } - - # do not align code across indentation level changes - # or if vertical alignment is turned off for debugging - if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { - - # we are allowed to shift a group of lines to the right if its - # level is greater than the previous and next group - $extra_indent_ok = - ( $level < $group_level && $last_group_level_written < $group_level ); - - my_flush(); - - # If we know that this line will get flushed out by itself because - # of level changes, we can leave the extra_indent_ok flag set. - # That way, if we get an external flush call, we will still be - # able to do some -lp alignment if necessary. - $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); - - $group_level = $level; - - # wait until after the above flush to get the leading space - # count because it may have been changed if the -icp flag is in - # effect - $leading_space_count = get_SPACES($indentation); - - } - - # -------------------------------------------------------------------- - # Patch to collect outdentable block COMMENTS - # -------------------------------------------------------------------- - my $is_blank_line = ""; - my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); - if ( $group_type eq 'COMMENT' ) { - if ( - ( - $is_block_comment - && $outdent_long_lines - && $leading_space_count == $comment_leading_space_count - ) - || $is_blank_line - ) - { - $group_lines[ ++$maximum_line_index ] = $rfields->[0]; - return; - } - else { - my_flush(); - } - } - - # -------------------------------------------------------------------- - # add dummy fields for terminal ternary - # -------------------------------------------------------------------- - my $j_terminal_match; - if ( $is_terminal_ternary && $current_line ) { - $j_terminal_match = - fix_terminal_ternary( $rfields, $rtokens, $rpatterns ); - $jmax = @{$rfields} - 1; - } - - # -------------------------------------------------------------------- - # add dummy fields for else statement - # -------------------------------------------------------------------- - if ( $rfields->[0] =~ /^else\s*$/ - && $current_line - && $level_jump == 0 ) - { - $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns ); - $jmax = @{$rfields} - 1; - } - - # -------------------------------------------------------------------- - # Step 1. Handle simple line of code with no fields to match. - # -------------------------------------------------------------------- - if ( $jmax <= 0 ) { - $zero_count++; - - if ( $maximum_line_index >= 0 - && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) ) - { - - # flush the current group if it has some aligned columns.. - if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } - - # flush current group if we are just collecting side comments.. - elsif ( - - # ...and we haven't seen a comment lately - ( $zero_count > 3 ) - - # ..or if this new line doesn't fit to the left of the comments - || ( ( $leading_space_count + length( $$rfields[0] ) ) > - $group_lines[0]->get_column(0) ) - ) - { - my_flush(); - } - } - - # patch to start new COMMENT group if this comment may be outdented - if ( $is_block_comment - && $outdent_long_lines - && $maximum_line_index < 0 ) - { - $group_type = 'COMMENT'; - $comment_leading_space_count = $leading_space_count; - $group_lines[ ++$maximum_line_index ] = $rfields->[0]; - return; - } - - # just write this line directly if no current group, no side comment, - # and no space recovery is needed. - if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) - { - write_leader_and_string( $leading_space_count, $$rfields[0], 0, - $outdent_long_lines, $rvertical_tightness_flags ); - return; - } - } - else { - $zero_count = 0; - } - - # programming check: (shouldn't happen) - # an error here implies an incorrect call was made - if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { - warning( -"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" - ); - report_definite_bug(); - } - - # -------------------------------------------------------------------- - # create an object to hold this line - # -------------------------------------------------------------------- - my $new_line = new Perl::Tidy::VerticalAligner::Line( - jmax => $jmax, - jmax_original_line => $jmax, - rtokens => $rtokens, - rfields => $rfields, - rpatterns => $rpatterns, - indentation => $indentation, - leading_space_count => $leading_space_count, - outdent_long_lines => $outdent_long_lines, - list_type => "", - is_hanging_side_comment => $is_hanging_side_comment, - maximum_line_length => $rOpts->{'maximum-line-length'}, - rvertical_tightness_flags => $rvertical_tightness_flags, - ); - - # Initialize a global flag saying if the last line of the group should - # match end of group and also terminate the group. There should be no - # returns between here and where the flag is handled at the bottom. - my $col_matching_terminal = 0; - if ( defined($j_terminal_match) ) { - - # remember the column of the terminal ? or { to match with - $col_matching_terminal = $current_line->get_column($j_terminal_match); - - # set global flag for sub decide_if_aligned - $is_matching_terminal_line = 1; - } - - # -------------------------------------------------------------------- - # It simplifies things to create a zero length side comment - # if none exists. - # -------------------------------------------------------------------- - make_side_comment( $new_line, $level_end ); - - # -------------------------------------------------------------------- - # Decide if this is a simple list of items. - # There are 3 list types: none, comma, comma-arrow. - # We use this below to be less restrictive in deciding what to align. - # -------------------------------------------------------------------- - if ($is_forced_break) { - decide_if_list($new_line); - } - - if ($current_line) { - - # -------------------------------------------------------------------- - # Allow hanging side comment to join current group, if any - # This will help keep side comments aligned, because otherwise we - # will have to start a new group, making alignment less likely. - # -------------------------------------------------------------------- - join_hanging_comment( $new_line, $current_line ) - if $is_hanging_side_comment; - - # -------------------------------------------------------------------- - # If there is just one previous line, and it has more fields - # than the new line, try to join fields together to get a match with - # the new line. At the present time, only a single leading '=' is - # allowed to be compressed out. This is useful in rare cases where - # a table is forced to use old breakpoints because of side comments, - # and the table starts out something like this: - # my %MonthChars = ('0', 'Jan', # side comment - # '1', 'Feb', - # '2', 'Mar', - # Eliminating the '=' field will allow the remaining fields to line up. - # This situation does not occur if there are no side comments - # because scan_list would put a break after the opening '('. - # -------------------------------------------------------------------- - eliminate_old_fields( $new_line, $current_line ); - - # -------------------------------------------------------------------- - # If the new line has more fields than the current group, - # see if we can match the first fields and combine the remaining - # fields of the new line. - # -------------------------------------------------------------------- - eliminate_new_fields( $new_line, $current_line ); - - # -------------------------------------------------------------------- - # Flush previous group unless all common tokens and patterns match.. - # -------------------------------------------------------------------- - check_match( $new_line, $current_line ); - - # -------------------------------------------------------------------- - # See if there is space for this line in the current group (if any) - # -------------------------------------------------------------------- - if ($current_line) { - check_fit( $new_line, $current_line ); - } - } - - # -------------------------------------------------------------------- - # Append this line to the current group (or start new group) - # -------------------------------------------------------------------- - accept_line($new_line); - - # Future update to allow this to vary: - $current_line = $new_line if ( $maximum_line_index == 0 ); - - # output this group if it ends in a terminal else or ternary line - if ( defined($j_terminal_match) ) { - - # if there is only one line in the group (maybe due to failure to match - # perfectly with previous lines), then align the ? or { of this - # terminal line with the previous one unless that would make the line - # too long - if ( $maximum_line_index == 0 ) { - my $col_now = $current_line->get_column($j_terminal_match); - my $pad = $col_matching_terminal - $col_now; - my $padding_available = - $current_line->get_available_space_on_right(); - if ( $pad > 0 && $pad <= $padding_available ) { - $current_line->increase_field_width( $j_terminal_match, $pad ); - } - } - my_flush(); - $is_matching_terminal_line = 0; - } - - # -------------------------------------------------------------------- - # Step 8. Some old debugging stuff - # -------------------------------------------------------------------- - VALIGN_DEBUG_FLAG_APPEND && do { - print "APPEND fields:"; - dump_array(@$rfields); - print "APPEND tokens:"; - dump_array(@$rtokens); - print "APPEND patterns:"; - dump_array(@$rpatterns); - dump_alignments(); - }; - - return; -} - -sub join_hanging_comment { - - my $line = shift; - my $jmax = $line->get_jmax(); - return 0 unless $jmax == 1; # must be 2 fields - my $rtokens = $line->get_rtokens(); - return 0 unless $$rtokens[0] eq '#'; # the second field is a comment.. - my $rfields = $line->get_rfields(); - return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty... - my $old_line = shift; - my $maximum_field_index = $old_line->get_jmax(); - return 0 - unless $maximum_field_index > $jmax; # the current line has more fields - my $rpatterns = $line->get_rpatterns(); - - $line->set_is_hanging_side_comment(1); - $jmax = $maximum_field_index; - $line->set_jmax($jmax); - $$rfields[$jmax] = $$rfields[1]; - $$rtokens[ $jmax - 1 ] = $$rtokens[0]; - $$rpatterns[ $jmax - 1 ] = $$rpatterns[0]; - for ( my $j = 1 ; $j < $jmax ; $j++ ) { - $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why? - $$rtokens[ $j - 1 ] = ""; - $$rpatterns[ $j - 1 ] = ""; - } - return 1; -} - -sub eliminate_old_fields { - - my $new_line = shift; - my $jmax = $new_line->get_jmax(); - if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } - if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } - - # there must be one previous line - return unless ( $maximum_line_index == 0 ); - - my $old_line = shift; - my $maximum_field_index = $old_line->get_jmax(); - - ############################################### - # this line must have fewer fields - return unless $maximum_field_index > $jmax; - ############################################### - - # Identify specific cases where field elimination is allowed: - # case=1: both lines have comma-separated lists, and the first - # line has an equals - # case=2: both lines have leading equals - - # case 1 is the default - my $case = 1; - - # See if case 2: both lines have leading '=' - # We'll require smiliar leading patterns in this case - my $old_rtokens = $old_line->get_rtokens(); - my $rtokens = $new_line->get_rtokens(); - my $rpatterns = $new_line->get_rpatterns(); - my $old_rpatterns = $old_line->get_rpatterns(); - if ( $rtokens->[0] =~ /^=\d*$/ - && $old_rtokens->[0] eq $rtokens->[0] - && $old_rpatterns->[0] eq $rpatterns->[0] ) - { - $case = 2; - } - - # not too many fewer fields in new line for case 1 - return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); - - # case 1 must have side comment - my $old_rfields = $old_line->get_rfields(); - return - if ( $case == 1 - && length( $$old_rfields[$maximum_field_index] ) == 0 ); - - my $rfields = $new_line->get_rfields(); - - my $hid_equals = 0; - - my @new_alignments = (); - my @new_fields = (); - my @new_matching_patterns = (); - my @new_matching_tokens = (); - - my $j = 0; - my $k; - my $current_field = ''; - my $current_pattern = ''; - - # loop over all old tokens - my $in_match = 0; - for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) { - $current_field .= $$old_rfields[$k]; - $current_pattern .= $$old_rpatterns[$k]; - last if ( $j > $jmax - 1 ); - - if ( $$old_rtokens[$k] eq $$rtokens[$j] ) { - $in_match = 1; - $new_fields[$j] = $current_field; - $new_matching_patterns[$j] = $current_pattern; - $current_field = ''; - $current_pattern = ''; - $new_matching_tokens[$j] = $$old_rtokens[$k]; - $new_alignments[$j] = $old_line->get_alignment($k); - $j++; - } - else { - - if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) { - last if ( $case == 2 ); # avoid problems with stuff - # like: $a=$b=$c=$d; - $hid_equals = 1; - } - last - if ( $in_match && $case == 1 ) - ; # disallow gaps in matching field types in case 1 - } - } - - # Modify the current state if we are successful. - # We must exactly reach the ends of both lists for success. - if ( ( $j == $jmax ) - && ( $current_field eq '' ) - && ( $case != 1 || $hid_equals ) ) - { - $k = $maximum_field_index; - $current_field .= $$old_rfields[$k]; - $current_pattern .= $$old_rpatterns[$k]; - $new_fields[$j] = $current_field; - $new_matching_patterns[$j] = $current_pattern; - - $new_alignments[$j] = $old_line->get_alignment($k); - $maximum_field_index = $j; - - $old_line->set_alignments(@new_alignments); - $old_line->set_jmax($jmax); - $old_line->set_rtokens( \@new_matching_tokens ); - $old_line->set_rfields( \@new_fields ); - $old_line->set_rpatterns( \@$rpatterns ); - } -} - -# create an empty side comment if none exists -sub make_side_comment { - my $new_line = shift; - my $level_end = shift; - my $jmax = $new_line->get_jmax(); - my $rtokens = $new_line->get_rtokens(); - - # if line does not have a side comment... - if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) { - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - $$rtokens[$jmax] = '#'; - $$rfields[ ++$jmax ] = ''; - $$rpatterns[$jmax] = '#'; - $new_line->set_jmax($jmax); - $new_line->set_jmax_original_line($jmax); - } - - # line has a side comment.. - else { - - # don't remember old side comment location for very long - my $line_number = $vertical_aligner_self->get_output_line_number(); - my $rfields = $new_line->get_rfields(); - if ( - $line_number - $last_side_comment_line_number > 12 - - # and don't remember comment location across block level changes - || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ ) - ) - { - forget_side_comment(); - } - $last_side_comment_line_number = $line_number; - $last_side_comment_level = $level_end; - } -} - -sub decide_if_list { - - my $line = shift; - - # A list will be taken to be a line with a forced break in which all - # of the field separators are commas or comma-arrows (except for the - # trailing #) - - # List separator tokens are things like ',3' or '=>2', - # where the trailing digit is the nesting depth. Allow braces - # to allow nested list items. - my $rtokens = $line->get_rtokens(); - my $test_token = $$rtokens[0]; - if ( $test_token =~ /^(\,|=>)/ ) { - my $list_type = $test_token; - my $jmax = $line->get_jmax(); - - foreach ( 1 .. $jmax - 2 ) { - if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) { - $list_type = ""; - last; - } - } - $line->set_list_type($list_type); - } -} - -sub eliminate_new_fields { - - return unless ( $maximum_line_index >= 0 ); - my ( $new_line, $old_line ) = @_; - my $jmax = $new_line->get_jmax(); - - my $old_rtokens = $old_line->get_rtokens(); - my $rtokens = $new_line->get_rtokens(); - my $is_assignment = - ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); - - # must be monotonic variation - return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); - - # must be more fields in the new line - my $maximum_field_index = $old_line->get_jmax(); - return unless ( $maximum_field_index < $jmax ); - - unless ($is_assignment) { - return - unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) - ; # only if monotonic - - # never combine fields of a comma list - return - unless ( $maximum_field_index > 1 ) - && ( $new_line->get_list_type() !~ /^,/ ); - } - - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - my $old_rpatterns = $old_line->get_rpatterns(); - - # loop over all OLD tokens except comment and check match - my $match = 1; - my $k; - for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) { - if ( ( $$old_rtokens[$k] ne $$rtokens[$k] ) - || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) ) - { - $match = 0; - last; - } - } - - # first tokens agree, so combine extra new tokens - if ($match) { - for $k ( $maximum_field_index .. $jmax - 1 ) { - - $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k]; - $$rfields[$k] = ""; - $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k]; - $$rpatterns[$k] = ""; - } - - $$rtokens[ $maximum_field_index - 1 ] = '#'; - $$rfields[$maximum_field_index] = $$rfields[$jmax]; - $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax]; - $jmax = $maximum_field_index; - } - $new_line->set_jmax($jmax); -} - -sub fix_terminal_ternary { - - # Add empty fields as necessary to align a ternary term - # like this: - # - # my $leapyear = - # $year % 4 ? 0 - # : $year % 100 ? 1 - # : $year % 400 ? 0 - # : 1; - # - # returns 1 if the terminal item should be indented - - my ( $rfields, $rtokens, $rpatterns ) = @_; - - my $jmax = @{$rfields} - 1; - my $old_line = $group_lines[$maximum_line_index]; - my $rfields_old = $old_line->get_rfields(); - - my $rpatterns_old = $old_line->get_rpatterns(); - my $rtokens_old = $old_line->get_rtokens(); - my $maximum_field_index = $old_line->get_jmax(); - - # look for the question mark after the : - my ($jquestion); - my $depth_question; - my $pad = ""; - for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) { - my $tok = $rtokens_old->[$j]; - if ( $tok =~ /^\?(\d+)$/ ) { - $depth_question = $1; - - # depth must be correct - next unless ( $depth_question eq $group_level ); - - $jquestion = $j; - if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { - $pad = " " x length($1); - } - else { - return; # shouldn't happen - } - last; - } - } - return unless ( defined($jquestion) ); # shouldn't happen - - # Now splice the tokens and patterns of the previous line - # into the else line to insure a match. Add empty fields - # as necessary. - my $jadd = $jquestion; - - # Work on copies of the actual arrays in case we have - # to return due to an error - my @fields = @{$rfields}; - my @patterns = @{$rpatterns}; - my @tokens = @{$rtokens}; - - VALIGN_DEBUG_FLAG_TERNARY && do { - local $" = '><'; - print "CURRENT FIELDS=<@{$rfields_old}>\n"; - print "CURRENT TOKENS=<@{$rtokens_old}>\n"; - print "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; - print "UNMODIFIED FIELDS=<@{$rfields}>\n"; - print "UNMODIFIED TOKENS=<@{$rtokens}>\n"; - print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; - }; - - # handle cases of leading colon on this line - if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { - - my ( $colon, $therest ) = ( $1, $2 ); - - # Handle sub-case of first field with leading colon plus additional code - # This is the usual situation as at the '1' below: - # ... - # : $year % 400 ? 0 - # : 1; - if ($therest) { - - # Split the first field after the leading colon and insert padding. - # Note that this padding will remain even if the terminal value goes - # out on a separate line. This does not seem to look to bad, so no - # mechanism has been included to undo it. - my $field1 = shift @fields; - unshift @fields, ( $colon, $pad . $therest ); - - # change the leading pattern from : to ? - return unless ( $patterns[0] =~ s/^\:/?/ ); - - # install leading tokens and patterns of existing line - unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); - unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); - - # insert appropriate number of empty fields - splice( @fields, 1, 0, ('') x $jadd ) if $jadd; - } - - # handle sub-case of first field just equal to leading colon. - # This can happen for example in the example below where - # the leading '(' would create a new alignment token - # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) - # : ( $mname = $name . '->' ); - else { - - return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen - - # prepend a leading ? onto the second pattern - $patterns[1] = "?b" . $patterns[1]; - - # pad the second field - $fields[1] = $pad . $fields[1]; - - # install leading tokens and patterns of existing line, replacing - # leading token and inserting appropriate number of empty fields - splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); - splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); - splice( @fields, 1, 0, ('') x $jadd ) if $jadd; - } - } - - # Handle case of no leading colon on this line. This will - # be the case when -wba=':' is used. For example, - # $year % 400 ? 0 : - # 1; - else { - - # install leading tokens and patterns of existing line - $patterns[0] = '?' . 'b' . $patterns[0]; - unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); - unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); - - # insert appropriate number of empty fields - $jadd = $jquestion + 1; - $fields[0] = $pad . $fields[0]; - splice( @fields, 0, 0, ('') x $jadd ) if $jadd; - } - - VALIGN_DEBUG_FLAG_TERNARY && do { - local $" = '><'; - print "MODIFIED TOKENS=<@tokens>\n"; - print "MODIFIED PATTERNS=<@patterns>\n"; - print "MODIFIED FIELDS=<@fields>\n"; - }; - - # all ok .. update the arrays - @{$rfields} = @fields; - @{$rtokens} = @tokens; - @{$rpatterns} = @patterns; - - # force a flush after this line - return $jquestion; -} - -sub fix_terminal_else { - - # Add empty fields as necessary to align a balanced terminal - # else block to a previous if/elsif/unless block, - # like this: - # - # if ( 1 || $x ) { print "ok 13\n"; } - # else { print "not ok 13\n"; } - # - # returns 1 if the else block should be indented - # - my ( $rfields, $rtokens, $rpatterns ) = @_; - my $jmax = @{$rfields} - 1; - return unless ( $jmax > 0 ); - - # check for balanced else block following if/elsif/unless - my $rfields_old = $current_line->get_rfields(); - - # TBD: add handling for 'case' - return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); - - # look for the opening brace after the else, and extrace the depth - my $tok_brace = $rtokens->[0]; - my $depth_brace; - if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } - - # probably: "else # side_comment" - else { return } - - my $rpatterns_old = $current_line->get_rpatterns(); - my $rtokens_old = $current_line->get_rtokens(); - my $maximum_field_index = $current_line->get_jmax(); - - # be sure the previous if/elsif is followed by an opening paren - my $jparen = 0; - my $tok_paren = '(' . $depth_brace; - my $tok_test = $rtokens_old->[$jparen]; - return unless ( $tok_test eq $tok_paren ); # shouldn't happen - - # Now find the opening block brace - my ($jbrace); - for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) { - my $tok = $rtokens_old->[$j]; - if ( $tok eq $tok_brace ) { - $jbrace = $j; - last; - } - } - return unless ( defined($jbrace) ); # shouldn't happen - - # Now splice the tokens and patterns of the previous line - # into the else line to insure a match. Add empty fields - # as necessary. - my $jadd = $jbrace - $jparen; - splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); - splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); - splice( @{$rfields}, 1, 0, ('') x $jadd ); - - # force a flush after this line if it does not follow a case - return $jbrace - unless ( $rfields_old->[0] =~ /^case\s*$/ ); -} - -{ # sub check_match - my %is_good_alignment; - - BEGIN { - - # Vertically aligning on certain "good" tokens is usually okay - # so we can be less restrictive in marginal cases. - @_ = qw( { ? => = ); - push @_, (','); - @is_good_alignment{@_} = (1) x scalar(@_); - } - - sub check_match { - - # See if the current line matches the current vertical alignment group. - # If not, flush the current group. - my $new_line = shift; - my $old_line = shift; - - # uses global variables: - # $previous_minimum_jmax_seen - # $maximum_jmax_seen - # $maximum_line_index - # $marginal_match - my $jmax = $new_line->get_jmax(); - my $maximum_field_index = $old_line->get_jmax(); - - # flush if this line has too many fields - if ( $jmax > $maximum_field_index ) { goto NO_MATCH } - - # flush if adding this line would make a non-monotonic field count - if ( - ( $maximum_field_index > $jmax ) # this has too few fields - && ( - ( $previous_minimum_jmax_seen < - $jmax ) # and wouldn't be monotonic - || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) - ) - ) - { - goto NO_MATCH; - } - - # otherwise see if this line matches the current group - my $jmax_original_line = $new_line->get_jmax_original_line(); - my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); - my $rtokens = $new_line->get_rtokens(); - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - my $list_type = $new_line->get_list_type(); - - my $group_list_type = $old_line->get_list_type(); - my $old_rpatterns = $old_line->get_rpatterns(); - my $old_rtokens = $old_line->get_rtokens(); - - my $jlimit = $jmax - 1; - if ( $maximum_field_index > $jmax ) { - $jlimit = $jmax_original_line; - --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); - } - - # handle comma-separated lists .. - if ( $group_list_type && ( $list_type eq $group_list_type ) ) { - for my $j ( 0 .. $jlimit ) { - my $old_tok = $$old_rtokens[$j]; - next unless $old_tok; - my $new_tok = $$rtokens[$j]; - next unless $new_tok; - - # lists always match ... - # unless they would align any '=>'s with ','s - goto NO_MATCH - if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ - || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); - } - } - - # do detailed check for everything else except hanging side comments - elsif ( !$is_hanging_side_comment ) { - - my $leading_space_count = $new_line->get_leading_space_count(); - - my $max_pad = 0; - my $min_pad = 0; - my $saw_good_alignment; - - for my $j ( 0 .. $jlimit ) { - - my $old_tok = $$old_rtokens[$j]; - my $new_tok = $$rtokens[$j]; - - # Note on encoding used for alignment tokens: - # ------------------------------------------- - # Tokens are "decorated" with information which can help - # prevent unwanted alignments. Consider for example the - # following two lines: - # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); - # local ( $i, $f ) = &'bdiv( $xn, $xd ); - # There are three alignment tokens in each line, a comma, - # an =, and a comma. In the first line these three tokens - # are encoded as: - # ,4+local-18 =3 ,4+split-7 - # and in the second line they are encoded as - # ,4+local-18 =3 ,4+&'bdiv-8 - # Tokens always at least have token name and nesting - # depth. So in this example the ='s are at depth 3 and - # the ,'s are at depth 4. This prevents aligning tokens - # of different depths. Commas contain additional - # information, as follows: - # , {depth} + {container name} - {spaces to opening paren} - # This allows us to reject matching the rightmost commas - # in the above two lines, since they are for different - # function calls. This encoding is done in - # 'sub send_lines_to_vertical_aligner'. - - # Pick off actual token. - # Everything up to the first digit is the actual token. - my $alignment_token = $new_tok; - if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } - - # see if the decorated tokens match - my $tokens_match = $new_tok eq $old_tok - - # Exception for matching terminal : of ternary statement.. - # consider containers prefixed by ? and : a match - || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); - - # No match if the alignment tokens differ... - if ( !$tokens_match ) { - - # ...Unless this is a side comment - if ( - $j == $jlimit - - # and there is either at least one alignment token - # or this is a single item following a list. This - # latter rule is required for 'December' to join - # the following list: - # my (@months) = ( - # '', 'January', 'February', 'March', - # 'April', 'May', 'June', 'July', - # 'August', 'September', 'October', 'November', - # 'December' - # ); - # If it doesn't then the -lp formatting will fail. - && ( $j > 0 || $old_tok =~ /^,/ ) - ) - { - $marginal_match = 1 - if ( $marginal_match == 0 - && $maximum_line_index == 0 ); - last; - } - - goto NO_MATCH; - } - - # Calculate amount of padding required to fit this in. - # $pad is the number of spaces by which we must increase - # the current field to squeeze in this field. - my $pad = - length( $$rfields[$j] ) - $old_line->current_field_width($j); - if ( $j == 0 ) { $pad += $leading_space_count; } - - # remember max pads to limit marginal cases - if ( $alignment_token ne '#' ) { - if ( $pad > $max_pad ) { $max_pad = $pad } - if ( $pad < $min_pad ) { $min_pad = $pad } - } - if ( $is_good_alignment{$alignment_token} ) { - $saw_good_alignment = 1; - } - - # If patterns don't match, we have to be careful... - if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { - - # flag this as a marginal match since patterns differ - $marginal_match = 1 - if ( $marginal_match == 0 && $maximum_line_index == 0 ); - - # We have to be very careful about aligning commas - # when the pattern's don't match, because it can be - # worse to create an alignment where none is needed - # than to omit one. Here's an example where the ','s - # are not in named continers. The first line below - # should not match the next two: - # ( $a, $b ) = ( $b, $r ); - # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); - # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); - if ( $alignment_token eq ',' ) { - - # do not align commas unless they are in named containers - goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); - } - - # do not align parens unless patterns match; - # large ugly spaces can occur in math expressions. - elsif ( $alignment_token eq '(' ) { - - # But we can allow a match if the parens don't - # require any padding. - if ( $pad != 0 ) { goto NO_MATCH } - } - - # Handle an '=' alignment with different patterns to - # the left. - elsif ( $alignment_token eq '=' ) { - - # It is best to be a little restrictive when - # aligning '=' tokens. Here is an example of - # two lines that we will not align: - # my $variable=6; - # $bb=4; - # The problem is that one is a 'my' declaration, - # and the other isn't, so they're not very similar. - # We will filter these out by comparing the first - # letter of the pattern. This is crude, but works - # well enough. - if ( - substr( $$old_rpatterns[$j], 0, 1 ) ne - substr( $$rpatterns[$j], 0, 1 ) ) - { - goto NO_MATCH; - } - - # If we pass that test, we'll call it a marginal match. - # Here is an example of a marginal match: - # $done{$$op} = 1; - # $op = compile_bblock($op); - # The left tokens are both identifiers, but - # one accesses a hash and the other doesn't. - # We'll let this be a tentative match and undo - # it later if we don't find more than 2 lines - # in the group. - elsif ( $maximum_line_index == 0 ) { - $marginal_match = - 2; # =2 prevents being undone below - } - } - } - - # Don't let line with fewer fields increase column widths - # ( align3.t ) - if ( $maximum_field_index > $jmax ) { - - # Exception: suspend this rule to allow last lines to join - if ( $pad > 0 ) { goto NO_MATCH; } - } - } ## end for my $j ( 0 .. $jlimit) - - # Turn off the "marginal match" flag in some cases... - # A "marginal match" occurs when the alignment tokens agree - # but there are differences in the other tokens (patterns). - # If we leave the marginal match flag set, then the rule is that we - # will align only if there are more than two lines in the group. - # We will turn of the flag if we almost have a match - # and either we have seen a good alignment token or we - # just need a small pad (2 spaces) to fit. These rules are - # the result of experimentation. Tokens which misaligned by just - # one or two characters are annoying. On the other hand, - # large gaps to less important alignment tokens are also annoying. - if ( $marginal_match == 1 - && $jmax == $maximum_field_index - && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) - ) - { - $marginal_match = 0; - } - ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; - } - - # We have a match (even if marginal). - # If the current line has fewer fields than the current group - # but otherwise matches, copy the remaining group fields to - # make it a perfect match. - if ( $maximum_field_index > $jmax ) { - my $comment = $$rfields[$jmax]; - for $jmax ( $jlimit .. $maximum_field_index ) { - $$rtokens[$jmax] = $$old_rtokens[$jmax]; - $$rfields[ ++$jmax ] = ''; - $$rpatterns[$jmax] = $$old_rpatterns[$jmax]; - } - $$rfields[$jmax] = $comment; - $new_line->set_jmax($jmax); - } - return; - - NO_MATCH: - ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n"; - my_flush(); - return; - } -} - -sub check_fit { - - return unless ( $maximum_line_index >= 0 ); - my $new_line = shift; - my $old_line = shift; - - my $jmax = $new_line->get_jmax(); - my $leading_space_count = $new_line->get_leading_space_count(); - my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); - my $rtokens = $new_line->get_rtokens(); - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - - my $group_list_type = $group_lines[0]->get_list_type(); - - my $padding_so_far = 0; - my $padding_available = $old_line->get_available_space_on_right(); - - # save current columns in case this doesn't work - save_alignment_columns(); - - my ( $j, $pad, $eight ); - my $maximum_field_index = $old_line->get_jmax(); - for $j ( 0 .. $jmax ) { - - $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j); - - if ( $j == 0 ) { - $pad += $leading_space_count; - } - - # remember largest gap of the group, excluding gap to side comment - if ( $pad < 0 - && $group_maximum_gap < -$pad - && $j > 0 - && $j < $jmax - 1 ) - { - $group_maximum_gap = -$pad; - } - - next if $pad < 0; - - ## This patch helps sometimes, but it doesn't check to see if - ## the line is too long even without the side comment. It needs - ## to be reworked. - ##don't let a long token with no trailing side comment push - ##side comments out, or end a group. (sidecmt1.t) - ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0); - - # This line will need space; lets see if we want to accept it.. - if ( - - # not if this won't fit - ( $pad > $padding_available ) - - # previously, there were upper bounds placed on padding here - # (maximum_whitespace_columns), but they were not really helpful - - ) - { - - # revert to starting state then flush; things didn't work out - restore_alignment_columns(); - my_flush(); - last; - } - - # patch to avoid excessive gaps in previous lines, - # due to a line of fewer fields. - # return join( ".", - # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, - # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); - next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); - - # looks ok, squeeze this field in - $old_line->increase_field_width( $j, $pad ); - $padding_available -= $pad; - - # remember largest gap of the group, excluding gap to side comment - if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { - $group_maximum_gap = $pad; - } - } -} - -sub accept_line { - - # The current line either starts a new alignment group or is - # accepted into the current alignment group. - my $new_line = shift; - $group_lines[ ++$maximum_line_index ] = $new_line; - - # initialize field lengths if starting new group - if ( $maximum_line_index == 0 ) { - - my $jmax = $new_line->get_jmax(); - my $rfields = $new_line->get_rfields(); - my $rtokens = $new_line->get_rtokens(); - my $j; - my $col = $new_line->get_leading_space_count(); - - for $j ( 0 .. $jmax ) { - $col += length( $$rfields[$j] ); - - # create initial alignments for the new group - my $token = ""; - if ( $j < $jmax ) { $token = $$rtokens[$j] } - my $alignment = make_alignment( $col, $token ); - $new_line->set_alignment( $j, $alignment ); - } - - $maximum_jmax_seen = $jmax; - $minimum_jmax_seen = $jmax; - } - - # use previous alignments otherwise - else { - my @new_alignments = - $group_lines[ $maximum_line_index - 1 ]->get_alignments(); - $new_line->set_alignments(@new_alignments); - } - - # remember group jmax extremes for next call to append_line - $previous_minimum_jmax_seen = $minimum_jmax_seen; - $previous_maximum_jmax_seen = $maximum_jmax_seen; -} - -sub dump_array { - - # debug routine to dump array contents - local $" = ')('; - print "(@_)\n"; -} - -# flush() sends the current Perl::Tidy::VerticalAligner group down the -# pipeline to Perl::Tidy::FileWriter. - -# This is the external flush, which also empties the cache -sub flush { - - if ( $maximum_line_index < 0 ) { - if ($cached_line_type) { - $seqno_string = $cached_seqno_string; - entab_and_output( $cached_line_text, - $cached_line_leading_space_count, - $last_group_level_written ); - $cached_line_type = 0; - $cached_line_text = ""; - $cached_seqno_string = ""; - } - } - else { - my_flush(); - } -} - -# This is the internal flush, which leaves the cache intact -sub my_flush { - - return if ( $maximum_line_index < 0 ); - - # handle a group of comment lines - if ( $group_type eq 'COMMENT' ) { - - VALIGN_DEBUG_FLAG_APPEND0 && do { - my ( $a, $b, $c ) = caller(); - print -"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; - - }; - my $leading_space_count = $comment_leading_space_count; - my $leading_string = get_leading_string($leading_space_count); - - # zero leading space count if any lines are too long - my $max_excess = 0; - for my $i ( 0 .. $maximum_line_index ) { - my $str = $group_lines[$i]; - my $excess = - length($str) + $leading_space_count - $rOpts_maximum_line_length; - if ( $excess > $max_excess ) { - $max_excess = $excess; - } - } - - if ( $max_excess > 0 ) { - $leading_space_count -= $max_excess; - if ( $leading_space_count < 0 ) { $leading_space_count = 0 } - $last_outdented_line_at = - $file_writer_object->get_output_line_number(); - unless ($outdented_line_count) { - $first_outdented_line_at = $last_outdented_line_at; - } - $outdented_line_count += ( $maximum_line_index + 1 ); - } - - # write the group of lines - my $outdent_long_lines = 0; - for my $i ( 0 .. $maximum_line_index ) { - write_leader_and_string( $leading_space_count, $group_lines[$i], 0, - $outdent_long_lines, "" ); - } - } - - # handle a group of code lines - else { - - VALIGN_DEBUG_FLAG_APPEND0 && do { - my $group_list_type = $group_lines[0]->get_list_type(); - my ( $a, $b, $c ) = caller(); - my $maximum_field_index = $group_lines[0]->get_jmax(); - print -"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; - - }; - - # some small groups are best left unaligned - my $do_not_align = decide_if_aligned(); - - # optimize side comment location - $do_not_align = adjust_side_comment($do_not_align); - - # recover spaces for -lp option if possible - my $extra_leading_spaces = get_extra_leading_spaces(); - - # all lines of this group have the same basic leading spacing - my $group_leader_length = $group_lines[0]->get_leading_space_count(); - - # add extra leading spaces if helpful - my $min_ci_gap = improve_continuation_indentation( $do_not_align, - $group_leader_length ); - - # loop to output all lines - for my $i ( 0 .. $maximum_line_index ) { - my $line = $group_lines[$i]; - write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align, - $group_leader_length, $extra_leading_spaces ); - } - } - initialize_for_new_group(); -} - -sub decide_if_aligned { - - # Do not try to align two lines which are not really similar - return unless $maximum_line_index == 1; - return if ($is_matching_terminal_line); - - my $group_list_type = $group_lines[0]->get_list_type(); - - my $do_not_align = ( - - # always align lists - !$group_list_type - - && ( - - # don't align if it was just a marginal match - $marginal_match - - # don't align two lines with big gap - || $group_maximum_gap > 12 - - # or lines with differing number of alignment tokens - # TODO: this could be improved. It occasionally rejects - # good matches. - || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen - ) - ); - - # But try to convert them into a simple comment group if the first line - # a has side comment - my $rfields = $group_lines[0]->get_rfields(); - my $maximum_field_index = $group_lines[0]->get_jmax(); - if ( $do_not_align - && ( $maximum_line_index > 0 ) - && ( length( $$rfields[$maximum_field_index] ) > 0 ) ) - { - combine_fields(); - $do_not_align = 0; - } - return $do_not_align; -} - -sub adjust_side_comment { - - my $do_not_align = shift; - - # let's see if we can move the side comment field out a little - # to improve readability (the last field is always a side comment field) - my $have_side_comment = 0; - my $first_side_comment_line = -1; - my $maximum_field_index = $group_lines[0]->get_jmax(); - for my $i ( 0 .. $maximum_line_index ) { - my $line = $group_lines[$i]; - - if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { - $have_side_comment = 1; - $first_side_comment_line = $i; - last; - } - } - - my $kmax = $maximum_field_index + 1; - - if ($have_side_comment) { - - my $line = $group_lines[0]; - - # the maximum space without exceeding the line length: - my $avail = $line->get_available_space_on_right(); - - # try to use the previous comment column - my $side_comment_column = $line->get_column( $kmax - 2 ); - my $move = $last_comment_column - $side_comment_column; - -## my $sc_line0 = $side_comment_history[0]->[0]; -## my $sc_col0 = $side_comment_history[0]->[1]; -## my $sc_line1 = $side_comment_history[1]->[0]; -## my $sc_col1 = $side_comment_history[1]->[1]; -## my $sc_line2 = $side_comment_history[2]->[0]; -## my $sc_col2 = $side_comment_history[2]->[1]; -## -## # FUTURE UPDATES: -## # Be sure to ignore 'do not align' and '} # end comments' -## # Find first $move > 0 and $move <= $avail as follows: -## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 -## # 2. try sc_col2 if (line-sc_line2) < 12 -## # 3. try min possible space, plus up to 8, -## # 4. try min possible space - - if ( $kmax > 0 && !$do_not_align ) { - - # but if this doesn't work, give up and use the minimum space - if ( $move > $avail ) { - $move = $rOpts_minimum_space_to_comment - 1; - } - - # but we want some minimum space to the comment - my $min_move = $rOpts_minimum_space_to_comment - 1; - if ( $move >= 0 - && $last_side_comment_length > 0 - && ( $first_side_comment_line == 0 ) - && $group_level == $last_group_level_written ) - { - $min_move = 0; - } - - if ( $move < $min_move ) { - $move = $min_move; - } - - # prevously, an upper bound was placed on $move here, - # (maximum_space_to_comment), but it was not helpful - - # don't exceed the available space - if ( $move > $avail ) { $move = $avail } - - # we can only increase space, never decrease - if ( $move > 0 ) { - $line->increase_field_width( $maximum_field_index - 1, $move ); - } - - # remember this column for the next group - $last_comment_column = $line->get_column( $kmax - 2 ); - } - else { - - # try to at least line up the existing side comment location - if ( $kmax > 0 && $move > 0 && $move < $avail ) { - $line->increase_field_width( $maximum_field_index - 1, $move ); - $do_not_align = 0; - } - - # reset side comment column if we can't align - else { - forget_side_comment(); - } - } - } - return $do_not_align; -} - -sub improve_continuation_indentation { - my ( $do_not_align, $group_leader_length ) = @_; - - # See if we can increase the continuation indentation - # to move all continuation lines closer to the next field - # (unless it is a comment). - # - # '$min_ci_gap'is the extra indentation that we may need to introduce. - # We will only introduce this to fields which already have some ci. - # Without this variable, we would occasionally get something like this - # (Complex.pm): - # - # use overload '+' => \&plus, - # '-' => \&minus, - # '*' => \&multiply, - # ... - # 'tan' => \&tan, - # 'atan2' => \&atan2, - # - # Whereas with this variable, we can shift variables over to get this: - # - # use overload '+' => \&plus, - # '-' => \&minus, - # '*' => \&multiply, - # ... - # 'tan' => \&tan, - # 'atan2' => \&atan2, - - ## BUB: Deactivated#################### - # The trouble with this patch is that it may, for example, - # move in some 'or's or ':'s, and leave some out, so that the - # left edge alignment suffers. - return 0; - ########################################### - - my $maximum_field_index = $group_lines[0]->get_jmax(); - - my $min_ci_gap = $rOpts_maximum_line_length; - if ( $maximum_field_index > 1 && !$do_not_align ) { - - for my $i ( 0 .. $maximum_line_index ) { - my $line = $group_lines[$i]; - my $leading_space_count = $line->get_leading_space_count(); - my $rfields = $line->get_rfields(); - - my $gap = - $line->get_column(0) - - $leading_space_count - - length( $$rfields[0] ); - - if ( $leading_space_count > $group_leader_length ) { - if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap } - } - } - - if ( $min_ci_gap >= $rOpts_maximum_line_length ) { - $min_ci_gap = 0; - } - } - else { - $min_ci_gap = 0; - } - return $min_ci_gap; -} - -sub write_vertically_aligned_line { - - my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, - $extra_leading_spaces ) - = @_; - my $rfields = $line->get_rfields(); - my $leading_space_count = $line->get_leading_space_count(); - my $outdent_long_lines = $line->get_outdent_long_lines(); - my $maximum_field_index = $line->get_jmax(); - my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); - - # add any extra spaces - if ( $leading_space_count > $group_leader_length ) { - $leading_space_count += $min_ci_gap; - } - - my $str = $$rfields[0]; - - # loop to concatenate all fields of this line and needed padding - my $total_pad_count = 0; - my ( $j, $pad ); - for $j ( 1 .. $maximum_field_index ) { - - # skip zero-length side comments - last - if ( ( $j == $maximum_field_index ) - && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) ) - ); - - # compute spaces of padding before this field - my $col = $line->get_column( $j - 1 ); - $pad = $col - ( length($str) + $leading_space_count ); - - if ($do_not_align) { - $pad = - ( $j < $maximum_field_index ) - ? 0 - : $rOpts_minimum_space_to_comment - 1; - } - - # if the -fpsc flag is set, move the side comment to the selected - # column if and only if it is possible, ignoring constraints on - # line length and minimum space to comment - if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) - { - my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; - if ( $newpad >= 0 ) { $pad = $newpad; } - } - - # accumulate the padding - if ( $pad > 0 ) { $total_pad_count += $pad; } - - # add this field - if ( !defined $$rfields[$j] ) { - write_diagnostics("UNDEFined field at j=$j\n"); - } - - # only add padding when we have a finite field; - # this avoids extra terminal spaces if we have empty fields - if ( length( $$rfields[$j] ) > 0 ) { - $str .= ' ' x $total_pad_count; - $total_pad_count = 0; - $str .= $$rfields[$j]; - } - else { - $total_pad_count = 0; - } - - # update side comment history buffer - if ( $j == $maximum_field_index ) { - my $lineno = $file_writer_object->get_output_line_number(); - shift @side_comment_history; - push @side_comment_history, [ $lineno, $col ]; - } - } - - my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); - - # ship this line off - write_leader_and_string( $leading_space_count + $extra_leading_spaces, - $str, $side_comment_length, $outdent_long_lines, - $rvertical_tightness_flags ); -} - -sub get_extra_leading_spaces { - - #---------------------------------------------------------- - # Define any extra indentation space (for the -lp option). - # Here is why: - # If a list has side comments, sub scan_list must dump the - # list before it sees everything. When this happens, it sets - # the indentation to the standard scheme, but notes how - # many spaces it would have liked to use. We may be able - # to recover that space here in the event that that all of the - # lines of a list are back together again. - #---------------------------------------------------------- - - my $extra_leading_spaces = 0; - if ($extra_indent_ok) { - my $object = $group_lines[0]->get_indentation(); - if ( ref($object) ) { - my $extra_indentation_spaces_wanted = - get_RECOVERABLE_SPACES($object); - - # all indentation objects must be the same - my $i; - for $i ( 1 .. $maximum_line_index ) { - if ( $object != $group_lines[$i]->get_indentation() ) { - $extra_indentation_spaces_wanted = 0; - last; - } - } - - if ($extra_indentation_spaces_wanted) { - - # the maximum space without exceeding the line length: - my $avail = $group_lines[0]->get_available_space_on_right(); - $extra_leading_spaces = - ( $avail > $extra_indentation_spaces_wanted ) - ? $extra_indentation_spaces_wanted - : $avail; - - # update the indentation object because with -icp the terminal - # ');' will use the same adjustment. - $object->permanently_decrease_AVAILABLE_SPACES( - -$extra_leading_spaces ); - } - } - } - return $extra_leading_spaces; -} - -sub combine_fields { - - # combine all fields except for the comment field ( sidecmt.t ) - # Uses global variables: - # @group_lines - # $maximum_line_index - my ( $j, $k ); - my $maximum_field_index = $group_lines[0]->get_jmax(); - for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) { - my $line = $group_lines[$j]; - my $rfields = $line->get_rfields(); - foreach ( 1 .. $maximum_field_index - 1 ) { - $$rfields[0] .= $$rfields[$_]; - } - $$rfields[1] = $$rfields[$maximum_field_index]; - - $line->set_jmax(1); - $line->set_column( 0, 0 ); - $line->set_column( 1, 0 ); - - } - $maximum_field_index = 1; - - for $j ( 0 .. $maximum_line_index ) { - my $line = $group_lines[$j]; - my $rfields = $line->get_rfields(); - for $k ( 0 .. $maximum_field_index ) { - my $pad = length( $$rfields[$k] ) - $line->current_field_width($k); - if ( $k == 0 ) { - $pad += $group_lines[$j]->get_leading_space_count(); - } - - if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } - - } - } -} - -sub get_output_line_number { - - # the output line number reported to a caller is the number of items - # written plus the number of items in the buffer - my $self = shift; - 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); -} - -sub write_leader_and_string { - - my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, - $rvertical_tightness_flags ) - = @_; - - # handle outdenting of long lines: - if ($outdent_long_lines) { - my $excess = - length($str) - - $side_comment_length + - $leading_space_count - - $rOpts_maximum_line_length; - if ( $excess > 0 ) { - $leading_space_count = 0; - $last_outdented_line_at = - $file_writer_object->get_output_line_number(); - - unless ($outdented_line_count) { - $first_outdented_line_at = $last_outdented_line_at; - } - $outdented_line_count++; - } - } - - # Make preliminary leading whitespace. It could get changed - # later by entabbing, so we have to keep track of any changes - # to the leading_space_count from here on. - my $leading_string = - $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; - - # Unpack any recombination data; it was packed by - # sub send_lines_to_vertical_aligner. Contents: - # - # [0] type: 1=opening 2=closing 3=opening block brace - # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok - # if closing: spaces of padding to use - # [2] sequence number of container - # [3] valid flag: do not append if this flag is false - # - my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, - $seqno_end ); - if ($rvertical_tightness_flags) { - ( - $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, - $seqno_end - ) = @{$rvertical_tightness_flags}; - } - - $seqno_string = $seqno_end; - - # handle any cached line .. - # either append this line to it or write it out - if ( length($cached_line_text) ) { - - if ( !$cached_line_valid ) { - entab_and_output( $cached_line_text, - $cached_line_leading_space_count, - $last_group_level_written ); - } - - # handle cached line with opening container token - elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { - - my $gap = $leading_space_count - length($cached_line_text); - - # handle option of just one tight opening per line: - if ( $cached_line_flag == 1 ) { - if ( defined($open_or_close) && $open_or_close == 1 ) { - $gap = -1; - } - } - - if ( $gap >= 0 ) { - $leading_string = $cached_line_text . ' ' x $gap; - $leading_space_count = $cached_line_leading_space_count; - $seqno_string = $cached_seqno_string . ':' . $seqno_beg; - } - else { - entab_and_output( $cached_line_text, - $cached_line_leading_space_count, - $last_group_level_written ); - } - } - - # handle cached line to place before this closing container token - else { - my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; - - if ( length($test_line) <= $rOpts_maximum_line_length ) { - - $seqno_string = $cached_seqno_string . ':' . $seqno_beg; - - # Patch to outdent closing tokens ending # in ');' - # If we are joining a line like ');' to a previous stacked - # set of closing tokens, then decide if we may outdent the - # combined stack to the indentation of the ');'. Since we - # should not normally outdent any of the other tokens more than - # the indentation of the lines that contained them, we will - # only do this if all of the corresponding opening - # tokens were on the same line. This can happen with - # -sot and -sct. For example, it is ok here: - # __PACKAGE__->load_components( qw( - # PK::Auto - # Core - # )); - # - # But, for example, we do not outdent in this example because - # that would put the closing sub brace out farther than the - # opening sub brace: - # - # perltidy -sot -sct - # $c->Tk::bind( - # '' => sub { - # my ($c) = @_; - # my $e = $c->XEvent; - # itemsUnderArea $c; - # } ); - # - if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { - - # The way to tell this is if the stacked sequence numbers - # of this output line are the reverse of the stacked - # sequence numbers of the previous non-blank line of - # sequence numbers. So we can join if the previous - # nonblank string of tokens is the mirror image. For - # example if stack )}] is 13:8:6 then we are looking for a - # leading stack like [{( which is 6:8:13 We only need to - # check the two ends, because the intermediate tokens must - # fall in order. Note on speed: having to split on colons - # and eliminate multiple colons might appear to be slow, - # but it's not an issue because we almost never come - # through here. In a typical file we don't. - $seqno_string =~ s/^:+//; - $last_nonblank_seqno_string =~ s/^:+//; - $seqno_string =~ s/:+/:/g; - $last_nonblank_seqno_string =~ s/:+/:/g; - - # how many spaces can we outdent? - my $diff = - $cached_line_leading_space_count - $leading_space_count; - if ( $diff > 0 - && length($seqno_string) - && length($last_nonblank_seqno_string) == - length($seqno_string) ) - { - my @seqno_last = - ( split ':', $last_nonblank_seqno_string ); - my @seqno_now = ( split ':', $seqno_string ); - if ( $seqno_now[-1] == $seqno_last[0] - && $seqno_now[0] == $seqno_last[-1] ) - { - - # OK to outdent .. - # for absolute safety, be sure we only remove - # whitespace - my $ws = substr( $test_line, 0, $diff ); - if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { - - $test_line = substr( $test_line, $diff ); - $cached_line_leading_space_count -= $diff; - } - - # shouldn't happen, but not critical: - ##else { - ## ERROR transferring indentation here - ##} - } - } - } - - $str = $test_line; - $leading_string = ""; - $leading_space_count = $cached_line_leading_space_count; - } - else { - entab_and_output( $cached_line_text, - $cached_line_leading_space_count, - $last_group_level_written ); - } - } - } - $cached_line_type = 0; - $cached_line_text = ""; - - # make the line to be written - my $line = $leading_string . $str; - - # write or cache this line - if ( !$open_or_close || $side_comment_length > 0 ) { - entab_and_output( $line, $leading_space_count, $group_level ); - } - else { - $cached_line_text = $line; - $cached_line_type = $open_or_close; - $cached_line_flag = $tightness_flag; - $cached_seqno = $seqno; - $cached_line_valid = $valid; - $cached_line_leading_space_count = $leading_space_count; - $cached_seqno_string = $seqno_string; - } - - $last_group_level_written = $group_level; - $last_side_comment_length = $side_comment_length; - $extra_indent_ok = 0; -} - -sub entab_and_output { - my ( $line, $leading_space_count, $level ) = @_; - - # The line is currently correct if there is no tabbing (recommended!) - # We may have to lop off some leading spaces and replace with tabs. - if ( $leading_space_count > 0 ) { - - # Nothing to do if no tabs - if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) - || $rOpts_indent_columns <= 0 ) - { - - # nothing to do - } - - # Handle entab option - elsif ($rOpts_entab_leading_whitespace) { - my $space_count = - $leading_space_count % $rOpts_entab_leading_whitespace; - my $tab_count = - int( $leading_space_count / $rOpts_entab_leading_whitespace ); - my $leading_string = "\t" x $tab_count . ' ' x $space_count; - if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { - substr( $line, 0, $leading_space_count ) = $leading_string; - } - else { - - # REMOVE AFTER TESTING - # shouldn't happen - program error counting whitespace - # we'll skip entabbing - warning( -"Error entabbing in entab_and_output: expected count=$leading_space_count\n" - ); - } - } - - # Handle option of one tab per level - else { - my $leading_string = ( "\t" x $level ); - my $space_count = - $leading_space_count - $level * $rOpts_indent_columns; - - # shouldn't happen: - if ( $space_count < 0 ) { - warning( -"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n" - ); - $leading_string = ( ' ' x $leading_space_count ); - } - else { - $leading_string .= ( ' ' x $space_count ); - } - if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { - substr( $line, 0, $leading_space_count ) = $leading_string; - } - else { - - # REMOVE AFTER TESTING - # shouldn't happen - program error counting whitespace - # we'll skip entabbing - warning( -"Error entabbing in entab_and_output: expected count=$leading_space_count\n" - ); - } - } - } - $file_writer_object->write_code_line( $line . "\n" ); - if ($seqno_string) { - $last_nonblank_seqno_string = $seqno_string; - } -} - -{ # begin get_leading_string - - my @leading_string_cache; - - sub get_leading_string { - - # define the leading whitespace string for this line.. - my $leading_whitespace_count = shift; - - # Handle case of zero whitespace, which includes multi-line quotes - # (which may have a finite level; this prevents tab problems) - if ( $leading_whitespace_count <= 0 ) { - return ""; - } - - # look for previous result - elsif ( $leading_string_cache[$leading_whitespace_count] ) { - return $leading_string_cache[$leading_whitespace_count]; - } - - # must compute a string for this number of spaces - my $leading_string; - - # Handle simple case of no tabs - if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) - || $rOpts_indent_columns <= 0 ) - { - $leading_string = ( ' ' x $leading_whitespace_count ); - } - - # Handle entab option - elsif ($rOpts_entab_leading_whitespace) { - my $space_count = - $leading_whitespace_count % $rOpts_entab_leading_whitespace; - my $tab_count = int( - $leading_whitespace_count / $rOpts_entab_leading_whitespace ); - $leading_string = "\t" x $tab_count . ' ' x $space_count; - } - - # Handle option of one tab per level - else { - $leading_string = ( "\t" x $group_level ); - my $space_count = - $leading_whitespace_count - $group_level * $rOpts_indent_columns; - - # shouldn't happen: - if ( $space_count < 0 ) { - warning( -"Error in append_line: for level=$group_level count=$leading_whitespace_count\n" - ); - $leading_string = ( ' ' x $leading_whitespace_count ); - } - else { - $leading_string .= ( ' ' x $space_count ); - } - } - $leading_string_cache[$leading_whitespace_count] = $leading_string; - return $leading_string; - } -} # end get_leading_string - -sub report_anything_unusual { - my $self = shift; - if ( $outdented_line_count > 0 ) { - write_logfile_entry( - "$outdented_line_count long lines were outdented:\n"); - write_logfile_entry( - " First at output line $first_outdented_line_at\n"); - - if ( $outdented_line_count > 1 ) { - write_logfile_entry( - " Last at output line $last_outdented_line_at\n"); - } - write_logfile_entry( - " use -noll to prevent outdenting, -l=n to increase line length\n" - ); - write_logfile_entry("\n"); - } -} - -##################################################################### -# -# the Perl::Tidy::FileWriter class writes the output file -# -##################################################################### - -package Perl::Tidy::FileWriter; - -# Maximum number of little messages; probably need not be changed. -use constant MAX_NAG_MESSAGES => 6; - -sub write_logfile_entry { - my $self = shift; - my $logger_object = $self->{_logger_object}; - if ($logger_object) { - $logger_object->write_logfile_entry(@_); - } -} - -sub new { - my $class = shift; - my ( $line_sink_object, $rOpts, $logger_object ) = @_; - - bless { - _line_sink_object => $line_sink_object, - _logger_object => $logger_object, - _rOpts => $rOpts, - _output_line_number => 1, - _consecutive_blank_lines => 0, - _consecutive_nonblank_lines => 0, - _first_line_length_error => 0, - _max_line_length_error => 0, - _last_line_length_error => 0, - _first_line_length_error_at => 0, - _max_line_length_error_at => 0, - _last_line_length_error_at => 0, - _line_length_error_count => 0, - _max_output_line_length => 0, - _max_output_line_length_at => 0, - }, $class; -} - -sub tee_on { - my $self = shift; - $self->{_line_sink_object}->tee_on(); -} - -sub tee_off { - my $self = shift; - $self->{_line_sink_object}->tee_off(); -} - -sub get_output_line_number { - my $self = shift; - return $self->{_output_line_number}; -} - -sub decrement_output_line_number { - my $self = shift; - $self->{_output_line_number}--; -} - -sub get_consecutive_nonblank_lines { - my $self = shift; - return $self->{_consecutive_nonblank_lines}; -} - -sub reset_consecutive_blank_lines { - my $self = shift; - $self->{_consecutive_blank_lines} = 0; -} - -sub want_blank_line { - my $self = shift; - unless ( $self->{_consecutive_blank_lines} ) { - $self->write_blank_code_line(); - } -} - -sub write_blank_code_line { - my $self = shift; - my $rOpts = $self->{_rOpts}; - return - if ( $self->{_consecutive_blank_lines} >= - $rOpts->{'maximum-consecutive-blank-lines'} ); - $self->{_consecutive_blank_lines}++; - $self->{_consecutive_nonblank_lines} = 0; - $self->write_line("\n"); -} - -sub write_code_line { - my $self = shift; - my $a = shift; - - if ( $a =~ /^\s*$/ ) { - my $rOpts = $self->{_rOpts}; - return - if ( $self->{_consecutive_blank_lines} >= - $rOpts->{'maximum-consecutive-blank-lines'} ); - $self->{_consecutive_blank_lines}++; - $self->{_consecutive_nonblank_lines} = 0; - } - else { - $self->{_consecutive_blank_lines} = 0; - $self->{_consecutive_nonblank_lines}++; - } - $self->write_line($a); -} - -sub write_line { - my $self = shift; - my $a = shift; - - # TODO: go through and see if the test is necessary here - if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } - - $self->{_line_sink_object}->write_line($a); - - # This calculation of excess line length ignores any internal tabs - my $rOpts = $self->{_rOpts}; - my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; - if ( $a =~ /^\t+/g ) { - $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); - } - - # Note that we just incremented output line number to future value - # so we must subtract 1 for current line number - if ( length($a) > 1 + $self->{_max_output_line_length} ) { - $self->{_max_output_line_length} = length($a) - 1; - $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; - } - - if ( $exceed > 0 ) { - my $output_line_number = $self->{_output_line_number}; - $self->{_last_line_length_error} = $exceed; - $self->{_last_line_length_error_at} = $output_line_number - 1; - if ( $self->{_line_length_error_count} == 0 ) { - $self->{_first_line_length_error} = $exceed; - $self->{_first_line_length_error_at} = $output_line_number - 1; - } - - if ( - $self->{_last_line_length_error} > $self->{_max_line_length_error} ) - { - $self->{_max_line_length_error} = $exceed; - $self->{_max_line_length_error_at} = $output_line_number - 1; - } - - if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { - $self->write_logfile_entry( - "Line length exceeded by $exceed characters\n"); - } - $self->{_line_length_error_count}++; - } - -} - -sub report_line_length_errors { - my $self = shift; - my $rOpts = $self->{_rOpts}; - my $line_length_error_count = $self->{_line_length_error_count}; - if ( $line_length_error_count == 0 ) { - $self->write_logfile_entry( - "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); - my $max_output_line_length = $self->{_max_output_line_length}; - my $max_output_line_length_at = $self->{_max_output_line_length_at}; - $self->write_logfile_entry( -" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" - ); - - } - else { - - my $word = ( $line_length_error_count > 1 ) ? "s" : ""; - $self->write_logfile_entry( -"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" - ); - - $word = ( $line_length_error_count > 1 ) ? "First" : ""; - my $first_line_length_error = $self->{_first_line_length_error}; - my $first_line_length_error_at = $self->{_first_line_length_error_at}; - $self->write_logfile_entry( -" $word at line $first_line_length_error_at by $first_line_length_error characters\n" - ); - - if ( $line_length_error_count > 1 ) { - my $max_line_length_error = $self->{_max_line_length_error}; - my $max_line_length_error_at = $self->{_max_line_length_error_at}; - my $last_line_length_error = $self->{_last_line_length_error}; - my $last_line_length_error_at = $self->{_last_line_length_error_at}; - $self->write_logfile_entry( -" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" - ); - $self->write_logfile_entry( -" Last at line $last_line_length_error_at by $last_line_length_error characters\n" - ); - } - } -} - -##################################################################### -# -# The Perl::Tidy::Debugger class shows line tokenization -# -##################################################################### - -package Perl::Tidy::Debugger; - -sub new { - - my ( $class, $filename ) = @_; - - bless { - _debug_file => $filename, - _debug_file_opened => 0, - _fh => undef, - }, $class; -} - -sub really_open_debug_file { - - my $self = shift; - my $debug_file = $self->{_debug_file}; - my $fh; - unless ( $fh = IO::File->new("> $debug_file") ) { - warn("can't open $debug_file: $!\n"); - } - $self->{_debug_file_opened} = 1; - $self->{_fh} = $fh; - print $fh - "Use -dump-token-types (-dtt) to get a list of token type codes\n"; -} - -sub close_debug_file { - - my $self = shift; - my $fh = $self->{_fh}; - if ( $self->{_debug_file_opened} ) { - - eval { $self->{_fh}->close() }; - } -} - -sub write_debug_entry { - - # This is a debug dump routine which may be modified as necessary - # to dump tokens on a line-by-line basis. The output will be written - # to the .DEBUG file when the -D flag is entered. - my $self = shift; - my $line_of_tokens = shift; - - my $input_line = $line_of_tokens->{_line_text}; - my $rtoken_type = $line_of_tokens->{_rtoken_type}; - my $rtokens = $line_of_tokens->{_rtokens}; - my $rlevels = $line_of_tokens->{_rlevels}; - my $rslevels = $line_of_tokens->{_rslevels}; - my $rblock_type = $line_of_tokens->{_rblock_type}; - my $input_line_number = $line_of_tokens->{_line_number}; - my $line_type = $line_of_tokens->{_line_type}; - - my ( $j, $num ); - - my $token_str = "$input_line_number: "; - my $reconstructed_original = "$input_line_number: "; - my $block_str = "$input_line_number: "; - - #$token_str .= "$line_type: "; - #$reconstructed_original .= "$line_type: "; - - my $pattern = ""; - my @next_char = ( '"', '"' ); - my $i_next = 0; - unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } - my $fh = $self->{_fh}; - - for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { - - # testing patterns - if ( $$rtoken_type[$j] eq 'k' ) { - $pattern .= $$rtokens[$j]; - } - else { - $pattern .= $$rtoken_type[$j]; - } - $reconstructed_original .= $$rtokens[$j]; - $block_str .= "($$rblock_type[$j])"; - $num = length( $$rtokens[$j] ); - my $type_str = $$rtoken_type[$j]; - - # be sure there are no blank tokens (shouldn't happen) - # This can only happen if a programming error has been made - # because all valid tokens are non-blank - if ( $type_str eq ' ' ) { - print $fh "BLANK TOKEN on the next line\n"; - $type_str = $next_char[$i_next]; - $i_next = 1 - $i_next; - } - - if ( length($type_str) == 1 ) { - $type_str = $type_str x $num; - } - $token_str .= $type_str; - } - - # Write what you want here ... - # print $fh "$input_line\n"; - # print $fh "$pattern\n"; - print $fh "$reconstructed_original\n"; - print $fh "$token_str\n"; - - #print $fh "$block_str\n"; -} - -##################################################################### -# -# The Perl::Tidy::LineBuffer class supplies a 'get_line()' -# method for returning the next line to be parsed, as well as a -# 'peek_ahead()' method -# -# The input parameter is an object with a 'get_line()' method -# which returns the next line to be parsed -# -##################################################################### - -package Perl::Tidy::LineBuffer; - -sub new { - - my $class = shift; - my $line_source_object = shift; - - return bless { - _line_source_object => $line_source_object, - _rlookahead_buffer => [], - }, $class; -} - -sub peek_ahead { - my $self = shift; - my $buffer_index = shift; - my $line = undef; - my $line_source_object = $self->{_line_source_object}; - my $rlookahead_buffer = $self->{_rlookahead_buffer}; - if ( $buffer_index < scalar(@$rlookahead_buffer) ) { - $line = $$rlookahead_buffer[$buffer_index]; - } - else { - $line = $line_source_object->get_line(); - push( @$rlookahead_buffer, $line ); - } - return $line; -} - -sub get_line { - my $self = shift; - my $line = undef; - my $line_source_object = $self->{_line_source_object}; - my $rlookahead_buffer = $self->{_rlookahead_buffer}; - - if ( scalar(@$rlookahead_buffer) ) { - $line = shift @$rlookahead_buffer; - } - else { - $line = $line_source_object->get_line(); - } - return $line; -} - -######################################################################## -# -# the Perl::Tidy::Tokenizer package is essentially a filter which -# reads lines of perl source code from a source object and provides -# corresponding tokenized lines through its get_line() method. Lines -# flow from the source_object to the caller like this: -# -# source_object --> LineBuffer_object --> Tokenizer --> calling routine -# get_line() get_line() get_line() line_of_tokens -# -# The source object can be any object with a get_line() method which -# supplies one line (a character string) perl call. -# The LineBuffer object is created by the Tokenizer. -# The Tokenizer returns a reference to a data structure 'line_of_tokens' -# containing one tokenized line for each call to its get_line() method. -# -# WARNING: This is not a real class yet. Only one tokenizer my be used. -# -######################################################################## - -package Perl::Tidy::Tokenizer; - -BEGIN { - - # Caution: these debug flags produce a lot of output - # They should all be 0 except when debugging small scripts - - use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; - use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; - use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; - use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; - use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; - - my $debug_warning = sub { - print "TOKENIZER_DEBUGGING with key $_[0]\n"; - }; - - TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); - TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); - TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); - TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); - TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); - -} - -use Carp; - -# PACKAGE VARIABLES for for processing an entire FILE. -use vars qw{ - $tokenizer_self - - $last_nonblank_token - $last_nonblank_type - $last_nonblank_block_type - $statement_type - $in_attribute_list - $current_package - $context - - %is_constant - %is_user_function - %user_function_prototype - %is_block_function - %is_block_list_function - %saw_function_definition - - $brace_depth - $paren_depth - $square_bracket_depth - - @current_depth - @total_depth - $total_depth - @nesting_sequence_number - @current_sequence_number - @paren_type - @paren_semicolon_count - @paren_structural_type - @brace_type - @brace_structural_type - @brace_statement_type - @brace_context - @brace_package - @square_bracket_type - @square_bracket_structural_type - @depth_array - @nested_ternary_flag - @starting_line_of_current_depth -}; - -# GLOBAL CONSTANTS for routines in this package -use vars qw{ - %is_indirect_object_taker - %is_block_operator - %expecting_operator_token - %expecting_operator_types - %expecting_term_types - %expecting_term_token - %is_digraph - %is_file_test_operator - %is_trigraph - %is_valid_token_type - %is_keyword - %is_code_block_token - %really_want_term - @opening_brace_names - @closing_brace_names - %is_keyword_taking_list - %is_q_qq_qw_qx_qr_s_y_tr_m -}; - -# possible values of operator_expected() -use constant TERM => -1; -use constant UNKNOWN => 0; -use constant OPERATOR => 1; - -# possible values of context -use constant SCALAR_CONTEXT => -1; -use constant UNKNOWN_CONTEXT => 0; -use constant LIST_CONTEXT => 1; - -# Maximum number of little messages; probably need not be changed. -use constant MAX_NAG_MESSAGES => 6; - -{ - - # methods to count instances - my $_count = 0; - sub get_count { $_count; } - sub _increment_count { ++$_count } - sub _decrement_count { --$_count } -} - -sub DESTROY { - $_[0]->_decrement_count(); -} - -sub new { - - my $class = shift; - - # Note: 'tabs' and 'indent_columns' are temporary and should be - # removed asap - my %defaults = ( - source_object => undef, - debugger_object => undef, - diagnostics_object => undef, - logger_object => undef, - starting_level => undef, - indent_columns => 4, - tabs => 0, - look_for_hash_bang => 0, - trim_qw => 1, - look_for_autoloader => 1, - look_for_selfloader => 1, - starting_line_number => 1, - ); - my %args = ( %defaults, @_ ); - - # we are given an object with a get_line() method to supply source lines - my $source_object = $args{source_object}; - - # we create another object with a get_line() and peek_ahead() method - my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); - - # Tokenizer state data is as follows: - # _rhere_target_list reference to list of here-doc targets - # _here_doc_target the target string for a here document - # _here_quote_character the type of here-doc quoting (" ' ` or none) - # to determine if interpolation is done - # _quote_target character we seek if chasing a quote - # _line_start_quote line where we started looking for a long quote - # _in_here_doc flag indicating if we are in a here-doc - # _in_pod flag set if we are in pod documentation - # _in_error flag set if we saw severe error (binary in script) - # _in_data flag set if we are in __DATA__ section - # _in_end flag set if we are in __END__ section - # _in_format flag set if we are in a format description - # _in_attribute_list flag telling if we are looking for attributes - # _in_quote flag telling if we are chasing a quote - # _starting_level indentation level of first line - # _input_tabstr string denoting one indentation level of input file - # _know_input_tabstr flag indicating if we know _input_tabstr - # _line_buffer_object object with get_line() method to supply source code - # _diagnostics_object place to write debugging information - # _unexpected_error_count error count used to limit output - # _lower_case_labels_at line numbers where lower case labels seen - $tokenizer_self = { - _rhere_target_list => [], - _in_here_doc => 0, - _here_doc_target => "", - _here_quote_character => "", - _in_data => 0, - _in_end => 0, - _in_format => 0, - _in_error => 0, - _in_pod => 0, - _in_attribute_list => 0, - _in_quote => 0, - _quote_target => "", - _line_start_quote => -1, - _starting_level => $args{starting_level}, - _know_starting_level => defined( $args{starting_level} ), - _tabs => $args{tabs}, - _indent_columns => $args{indent_columns}, - _look_for_hash_bang => $args{look_for_hash_bang}, - _trim_qw => $args{trim_qw}, - _input_tabstr => "", - _know_input_tabstr => -1, - _last_line_number => $args{starting_line_number} - 1, - _saw_perl_dash_P => 0, - _saw_perl_dash_w => 0, - _saw_use_strict => 0, - _saw_v_string => 0, - _look_for_autoloader => $args{look_for_autoloader}, - _look_for_selfloader => $args{look_for_selfloader}, - _saw_autoloader => 0, - _saw_selfloader => 0, - _saw_hash_bang => 0, - _saw_end => 0, - _saw_data => 0, - _saw_negative_indentation => 0, - _started_tokenizing => 0, - _line_buffer_object => $line_buffer_object, - _debugger_object => $args{debugger_object}, - _diagnostics_object => $args{diagnostics_object}, - _logger_object => $args{logger_object}, - _unexpected_error_count => 0, - _started_looking_for_here_target_at => 0, - _nearly_matched_here_target_at => undef, - _line_text => "", - _rlower_case_labels_at => undef, - }; - - prepare_for_a_new_file(); - find_starting_indentation_level(); - - bless $tokenizer_self, $class; - - # This is not a full class yet, so die if an attempt is made to - # create more than one object. - - if ( _increment_count() > 1 ) { - confess -"Attempt to create more than 1 object in $class, which is not a true class yet\n"; - } - - return $tokenizer_self; - -} - -# interface to Perl::Tidy::Logger routines -sub warning { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->warning(@_); - } -} - -sub complain { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->complain(@_); - } -} - -sub write_logfile_entry { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->write_logfile_entry(@_); - } -} - -sub interrupt_logfile { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->interrupt_logfile(); - } -} - -sub resume_logfile { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->resume_logfile(); - } -} - -sub increment_brace_error { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->increment_brace_error(); - } -} - -sub report_definite_bug { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->report_definite_bug(); - } -} - -sub brace_warning { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->brace_warning(@_); - } -} - -sub get_saw_brace_error { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->get_saw_brace_error(); - } - else { - 0; - } -} - -# interface to Perl::Tidy::Diagnostics routines -sub write_diagnostics { - if ( $tokenizer_self->{_diagnostics_object} ) { - $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_); - } -} - -sub report_tokenization_errors { - - my $self = shift; - - my $level = get_indentation_level(); - if ( $level != $tokenizer_self->{_starting_level} ) { - warning("final indentation level: $level\n"); - } - - check_final_nesting_depths(); - - if ( $tokenizer_self->{_look_for_hash_bang} - && !$tokenizer_self->{_saw_hash_bang} ) - { - warning( - "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); - } - - if ( $tokenizer_self->{_in_format} ) { - warning("hit EOF while in format description\n"); - } - - if ( $tokenizer_self->{_in_pod} ) { - - # Just write log entry if this is after __END__ or __DATA__ - # because this happens to often, and it is not likely to be - # a parsing error. - if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { - write_logfile_entry( -"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" - ); - } - - else { - complain( -"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" - ); - } - - } - - if ( $tokenizer_self->{_in_here_doc} ) { - my $here_doc_target = $tokenizer_self->{_here_doc_target}; - my $started_looking_for_here_target_at = - $tokenizer_self->{_started_looking_for_here_target_at}; - if ($here_doc_target) { - warning( -"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" - ); - } - else { - warning( -"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" - ); - } - my $nearly_matched_here_target_at = - $tokenizer_self->{_nearly_matched_here_target_at}; - if ($nearly_matched_here_target_at) { - warning( -"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" - ); - } - } - - if ( $tokenizer_self->{_in_quote} ) { - my $line_start_quote = $tokenizer_self->{_line_start_quote}; - my $quote_target = $tokenizer_self->{_quote_target}; - my $what = - ( $tokenizer_self->{_in_attribute_list} ) - ? "attribute list" - : "quote/pattern"; - warning( -"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" - ); - } - - unless ( $tokenizer_self->{_saw_perl_dash_w} ) { - if ( $] < 5.006 ) { - write_logfile_entry("Suggest including '-w parameter'\n"); - } - else { - write_logfile_entry("Suggest including 'use warnings;'\n"); - } - } - - if ( $tokenizer_self->{_saw_perl_dash_P} ) { - write_logfile_entry("Use of -P parameter for defines is discouraged\n"); - } - - unless ( $tokenizer_self->{_saw_use_strict} ) { - write_logfile_entry("Suggest including 'use strict;'\n"); - } - - # it is suggested that lables have at least one upper case character - # for legibility and to avoid code breakage as new keywords are introduced - if ( $tokenizer_self->{_rlower_case_labels_at} ) { - my @lower_case_labels_at = - @{ $tokenizer_self->{_rlower_case_labels_at} }; - write_logfile_entry( - "Suggest using upper case characters in label(s)\n"); - local $" = ')('; - write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); - } -} - -sub report_v_string { - - # warn if this version can't handle v-strings - my $tok = shift; - unless ( $tokenizer_self->{_saw_v_string} ) { - $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; - } - if ( $] < 5.006 ) { - warning( -"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" - ); - } -} - -sub get_input_line_number { - return $tokenizer_self->{_last_line_number}; -} - -# returns the next tokenized line -sub get_line { - - my $self = shift; - - # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, - # $square_bracket_depth, $paren_depth - - my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); - $tokenizer_self->{_line_text} = $input_line; - - return undef unless ($input_line); - - my $input_line_number = ++$tokenizer_self->{_last_line_number}; - - # Find and remove what characters terminate this line, including any - # control r - my $input_line_separator = ""; - if ( chomp($input_line) ) { $input_line_separator = $/ } - - # TODO: what other characters should be included here? - if ( $input_line =~ s/((\r|\035|\032)+)$// ) { - $input_line_separator = $2 . $input_line_separator; - } - - # for backwards compatability we keep the line text terminated with - # a newline character - $input_line .= "\n"; - $tokenizer_self->{_line_text} = $input_line; # update - - # create a data structure describing this line which will be - # returned to the caller. - - # _line_type codes are: - # SYSTEM - system-specific code before hash-bang line - # CODE - line of perl code (including comments) - # POD_START - line starting pod, such as '=head' - # POD - pod documentation text - # POD_END - last line of pod section, '=cut' - # HERE - text of here-document - # HERE_END - last line of here-doc (target word) - # FORMAT - format section - # FORMAT_END - last line of format section, '.' - # DATA_START - __DATA__ line - # DATA - unidentified text following __DATA__ - # END_START - __END__ line - # END - unidentified text following __END__ - # ERROR - we are in big trouble, probably not a perl script - - # Other variables: - # _curly_brace_depth - depth of curly braces at start of line - # _square_bracket_depth - depth of square brackets at start of line - # _paren_depth - depth of parens at start of line - # _starting_in_quote - this line continues a multi-line quote - # (so don't trim leading blanks!) - # _ending_in_quote - this line ends in a multi-line quote - # (so don't trim trailing blanks!) - my $line_of_tokens = { - _line_type => 'EOF', - _line_text => $input_line, - _line_number => $input_line_number, - _rtoken_type => undef, - _rtokens => undef, - _rlevels => undef, - _rslevels => undef, - _rblock_type => undef, - _rcontainer_type => undef, - _rcontainer_environment => undef, - _rtype_sequence => undef, - _rnesting_tokens => undef, - _rci_levels => undef, - _rnesting_blocks => undef, - _python_indentation_level => -1, ## 0, - _starting_in_quote => 0, # to be set by subroutine - _ending_in_quote => 0, - _curly_brace_depth => $brace_depth, - _square_bracket_depth => $square_bracket_depth, - _paren_depth => $paren_depth, - _quote_character => '', - }; - - # must print line unchanged if we are in a here document - if ( $tokenizer_self->{_in_here_doc} ) { - - $line_of_tokens->{_line_type} = 'HERE'; - my $here_doc_target = $tokenizer_self->{_here_doc_target}; - my $here_quote_character = $tokenizer_self->{_here_quote_character}; - my $candidate_target = $input_line; - chomp $candidate_target; - if ( $candidate_target eq $here_doc_target ) { - $tokenizer_self->{_nearly_matched_here_target_at} = undef; - $line_of_tokens->{_line_type} = 'HERE_END'; - write_logfile_entry("Exiting HERE document $here_doc_target\n"); - - my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; - if (@$rhere_target_list) { # there can be multiple here targets - ( $here_doc_target, $here_quote_character ) = - @{ shift @$rhere_target_list }; - $tokenizer_self->{_here_doc_target} = $here_doc_target; - $tokenizer_self->{_here_quote_character} = - $here_quote_character; - write_logfile_entry( - "Entering HERE document $here_doc_target\n"); - $tokenizer_self->{_nearly_matched_here_target_at} = undef; - $tokenizer_self->{_started_looking_for_here_target_at} = - $input_line_number; - } - else { - $tokenizer_self->{_in_here_doc} = 0; - $tokenizer_self->{_here_doc_target} = ""; - $tokenizer_self->{_here_quote_character} = ""; - } - } - - # check for error of extra whitespace - # note for PERL6: leading whitespace is allowed - else { - $candidate_target =~ s/\s*$//; - $candidate_target =~ s/^\s*//; - if ( $candidate_target eq $here_doc_target ) { - $tokenizer_self->{_nearly_matched_here_target_at} = - $input_line_number; - } - } - return $line_of_tokens; - } - - # must print line unchanged if we are in a format section - elsif ( $tokenizer_self->{_in_format} ) { - - if ( $input_line =~ /^\.[\s#]*$/ ) { - write_logfile_entry("Exiting format section\n"); - $tokenizer_self->{_in_format} = 0; - $line_of_tokens->{_line_type} = 'FORMAT_END'; - } - else { - $line_of_tokens->{_line_type} = 'FORMAT'; - } - return $line_of_tokens; - } - - # must print line unchanged if we are in pod documentation - elsif ( $tokenizer_self->{_in_pod} ) { - - $line_of_tokens->{_line_type} = 'POD'; - if ( $input_line =~ /^=cut/ ) { - $line_of_tokens->{_line_type} = 'POD_END'; - write_logfile_entry("Exiting POD section\n"); - $tokenizer_self->{_in_pod} = 0; - } - if ( $input_line =~ /^\#\!.*perl\b/ ) { - warning( - "Hash-bang in pod can cause older versions of perl to fail! \n" - ); - } - - return $line_of_tokens; - } - - # must print line unchanged if we have seen a severe error (i.e., we - # are seeing illegal tokens and connot continue. Syntax errors do - # not pass this route). Calling routine can decide what to do, but - # the default can be to just pass all lines as if they were after __END__ - elsif ( $tokenizer_self->{_in_error} ) { - $line_of_tokens->{_line_type} = 'ERROR'; - return $line_of_tokens; - } - - # print line unchanged if we are __DATA__ section - elsif ( $tokenizer_self->{_in_data} ) { - - # ...but look for POD - # Note that the _in_data and _in_end flags remain set - # so that we return to that state after seeing the - # end of a pod section - if ( $input_line =~ /^=(?!cut)/ ) { - $line_of_tokens->{_line_type} = 'POD_START'; - write_logfile_entry("Entering POD section\n"); - $tokenizer_self->{_in_pod} = 1; - return $line_of_tokens; - } - else { - $line_of_tokens->{_line_type} = 'DATA'; - return $line_of_tokens; - } - } - - # print line unchanged if we are in __END__ section - elsif ( $tokenizer_self->{_in_end} ) { - - # ...but look for POD - # Note that the _in_data and _in_end flags remain set - # so that we return to that state after seeing the - # end of a pod section - if ( $input_line =~ /^=(?!cut)/ ) { - $line_of_tokens->{_line_type} = 'POD_START'; - write_logfile_entry("Entering POD section\n"); - $tokenizer_self->{_in_pod} = 1; - return $line_of_tokens; - } - else { - $line_of_tokens->{_line_type} = 'END'; - return $line_of_tokens; - } - } - - # check for a hash-bang line if we haven't seen one - if ( !$tokenizer_self->{_saw_hash_bang} ) { - if ( $input_line =~ /^\#\!.*perl\b/ ) { - $tokenizer_self->{_saw_hash_bang} = $input_line_number; - - # check for -w and -P flags - if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { - $tokenizer_self->{_saw_perl_dash_P} = 1; - } - - if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { - $tokenizer_self->{_saw_perl_dash_w} = 1; - } - - if ( ( $input_line_number > 1 ) - && ( !$tokenizer_self->{_look_for_hash_bang} ) ) - { - - # this is helpful for VMS systems; we may have accidentally - # tokenized some DCL commands - if ( $tokenizer_self->{_started_tokenizing} ) { - warning( -"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" - ); - } - else { - complain("Useless hash-bang after line 1\n"); - } - } - - # Report the leading hash-bang as a system line - # This will prevent -dac from deleting it - else { - $line_of_tokens->{_line_type} = 'SYSTEM'; - return $line_of_tokens; - } - } - } - - # wait for a hash-bang before parsing if the user invoked us with -x - if ( $tokenizer_self->{_look_for_hash_bang} - && !$tokenizer_self->{_saw_hash_bang} ) - { - $line_of_tokens->{_line_type} = 'SYSTEM'; - return $line_of_tokens; - } - - # a first line of the form ': #' will be marked as SYSTEM - # since lines of this form may be used by tcsh - if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { - $line_of_tokens->{_line_type} = 'SYSTEM'; - return $line_of_tokens; - } - - # now we know that it is ok to tokenize the line... - # the line tokenizer will modify any of these private variables: - # _rhere_target_list - # _in_data - # _in_end - # _in_format - # _in_error - # _in_pod - # _in_quote - my $ending_in_quote_last = $tokenizer_self->{_in_quote}; - tokenize_this_line($line_of_tokens); - - # Now finish defining the return structure and return it - $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; - - # handle severe error (binary data in script) - if ( $tokenizer_self->{_in_error} ) { - $tokenizer_self->{_in_quote} = 0; # to avoid any more messages - warning("Giving up after error\n"); - $line_of_tokens->{_line_type} = 'ERROR'; - reset_indentation_level(0); # avoid error messages - return $line_of_tokens; - } - - # handle start of pod documentation - if ( $tokenizer_self->{_in_pod} ) { - - # This gets tricky..above a __DATA__ or __END__ section, perl - # accepts '=cut' as the start of pod section. But afterwards, - # only pod utilities see it and they may ignore an =cut without - # leading =head. In any case, this isn't good. - if ( $input_line =~ /^=cut\b/ ) { - if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { - complain("=cut while not in pod ignored\n"); - $tokenizer_self->{_in_pod} = 0; - $line_of_tokens->{_line_type} = 'POD_END'; - } - else { - $line_of_tokens->{_line_type} = 'POD_START'; - complain( -"=cut starts a pod section .. this can fool pod utilities.\n" - ); - write_logfile_entry("Entering POD section\n"); - } - } - - else { - $line_of_tokens->{_line_type} = 'POD_START'; - write_logfile_entry("Entering POD section\n"); - } - - return $line_of_tokens; - } - - # update indentation levels for log messages - if ( $input_line !~ /^\s*$/ ) { - my $rlevels = $line_of_tokens->{_rlevels}; - my $structural_indentation_level = $$rlevels[0]; - my ( $python_indentation_level, $msg ) = - find_indentation_level( $input_line, $structural_indentation_level ); - if ($msg) { write_logfile_entry("$msg") } - if ( $tokenizer_self->{_know_input_tabstr} == 1 ) { - $line_of_tokens->{_python_indentation_level} = - $python_indentation_level; - } - } - - # see if this line contains here doc targets - my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; - if (@$rhere_target_list) { - - my ( $here_doc_target, $here_quote_character ) = - @{ shift @$rhere_target_list }; - $tokenizer_self->{_in_here_doc} = 1; - $tokenizer_self->{_here_doc_target} = $here_doc_target; - $tokenizer_self->{_here_quote_character} = $here_quote_character; - write_logfile_entry("Entering HERE document $here_doc_target\n"); - $tokenizer_self->{_started_looking_for_here_target_at} = - $input_line_number; - } - - # NOTE: __END__ and __DATA__ statements are written unformatted - # because they can theoretically contain additional characters - # which are not tokenized (and cannot be read with either!). - if ( $tokenizer_self->{_in_data} ) { - $line_of_tokens->{_line_type} = 'DATA_START'; - write_logfile_entry("Starting __DATA__ section\n"); - $tokenizer_self->{_saw_data} = 1; - - # keep parsing after __DATA__ if use SelfLoader was seen - if ( $tokenizer_self->{_saw_selfloader} ) { - $tokenizer_self->{_in_data} = 0; - write_logfile_entry( - "SelfLoader seen, continuing; -nlsl deactivates\n"); - } - - return $line_of_tokens; - } - - elsif ( $tokenizer_self->{_in_end} ) { - $line_of_tokens->{_line_type} = 'END_START'; - write_logfile_entry("Starting __END__ section\n"); - $tokenizer_self->{_saw_end} = 1; - - # keep parsing after __END__ if use AutoLoader was seen - if ( $tokenizer_self->{_saw_autoloader} ) { - $tokenizer_self->{_in_end} = 0; - write_logfile_entry( - "AutoLoader seen, continuing; -nlal deactivates\n"); - } - return $line_of_tokens; - } - - # now, finally, we know that this line is type 'CODE' - $line_of_tokens->{_line_type} = 'CODE'; - - # remember if we have seen any real code - if ( !$tokenizer_self->{_started_tokenizing} - && $input_line !~ /^\s*$/ - && $input_line !~ /^\s*#/ ) - { - $tokenizer_self->{_started_tokenizing} = 1; - } - - if ( $tokenizer_self->{_debugger_object} ) { - $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); - } - - # Note: if keyword 'format' occurs in this line code, it is still CODE - # (keyword 'format' need not start a line) - if ( $tokenizer_self->{_in_format} ) { - write_logfile_entry("Entering format section\n"); - } - - if ( $tokenizer_self->{_in_quote} - and ( $tokenizer_self->{_line_start_quote} < 0 ) ) - { - - #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { - if ( - ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) - { - $tokenizer_self->{_line_start_quote} = $input_line_number; - write_logfile_entry( - "Start multi-line quote or pattern ending in $quote_target\n"); - } - } - elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) - and !$tokenizer_self->{_in_quote} ) - { - $tokenizer_self->{_line_start_quote} = -1; - write_logfile_entry("End of multi-line quote or pattern\n"); - } - - # we are returning a line of CODE - return $line_of_tokens; -} - -sub find_starting_indentation_level { - - # USES GLOBAL VARIABLES: $tokenizer_self - my $starting_level = 0; - my $know_input_tabstr = -1; # flag for find_indentation_level - - # use value if given as parameter - if ( $tokenizer_self->{_know_starting_level} ) { - $starting_level = $tokenizer_self->{_starting_level}; - } - - # if we know there is a hash_bang line, the level must be zero - elsif ( $tokenizer_self->{_look_for_hash_bang} ) { - $tokenizer_self->{_know_starting_level} = 1; - } - - # otherwise figure it out from the input file - else { - my $line; - my $i = 0; - my $structural_indentation_level = -1; # flag for find_indentation_level - - my $msg = ""; - while ( $line = - $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) - { - - # if first line is #! then assume starting level is zero - if ( $i == 1 && $line =~ /^\#\!/ ) { - $starting_level = 0; - last; - } - next if ( $line =~ /^\s*#/ ); # must not be comment - next if ( $line =~ /^\s*$/ ); # must not be blank - ( $starting_level, $msg ) = - find_indentation_level( $line, $structural_indentation_level ); - if ($msg) { write_logfile_entry("$msg") } - last; - } - $msg = "Line $i implies starting-indentation-level = $starting_level\n"; - - if ( $starting_level > 0 ) { - - my $input_tabstr = $tokenizer_self->{_input_tabstr}; - if ( $input_tabstr eq "\t" ) { - $msg .= "by guessing input tabbing uses 1 tab per level\n"; - } - else { - my $cols = length($input_tabstr); - $msg .= - "by guessing input tabbing uses $cols blanks per level\n"; - } - } - write_logfile_entry("$msg"); - } - $tokenizer_self->{_starting_level} = $starting_level; - reset_indentation_level($starting_level); -} - -# Find indentation level given a input line. At the same time, try to -# figure out the input tabbing scheme. -# -# There are two types of calls: -# -# Type 1: $structural_indentation_level < 0 -# In this case we have to guess $input_tabstr to figure out the level. -# -# Type 2: $structural_indentation_level >= 0 -# In this case the level of this line is known, and this routine can -# update the tabbing string, if still unknown, to make the level correct. - -sub find_indentation_level { - my ( $line, $structural_indentation_level ) = @_; - - # USES GLOBAL VARIABLES: $tokenizer_self - my $level = 0; - my $msg = ""; - - my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr}; - my $input_tabstr = $tokenizer_self->{_input_tabstr}; - - # find leading whitespace - my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : ""; - - # make first guess at input tabbing scheme if necessary - if ( $know_input_tabstr < 0 ) { - - $know_input_tabstr = 0; - - if ( $tokenizer_self->{_tabs} ) { - $input_tabstr = "\t"; - if ( length($leading_whitespace) > 0 ) { - if ( $leading_whitespace !~ /\t/ ) { - - my $cols = $tokenizer_self->{_indent_columns}; - - if ( length($leading_whitespace) < $cols ) { - $cols = length($leading_whitespace); - } - $input_tabstr = " " x $cols; - } - } - } - else { - $input_tabstr = " " x $tokenizer_self->{_indent_columns}; - - if ( length($leading_whitespace) > 0 ) { - if ( $leading_whitespace =~ /^\t/ ) { - $input_tabstr = "\t"; - } - } - } - $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; - $tokenizer_self->{_input_tabstr} = $input_tabstr; - } - - # determine the input tabbing scheme if possible - if ( ( $know_input_tabstr == 0 ) - && ( length($leading_whitespace) > 0 ) - && ( $structural_indentation_level > 0 ) ) - { - my $saved_input_tabstr = $input_tabstr; - - # check for common case of one tab per indentation level - if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { - if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { - $input_tabstr = "\t"; - $msg = "Guessing old indentation was tab character\n"; - } - } - - else { - - # detab any tabs based on 8 blanks per tab - my $entabbed = ""; - if ( $leading_whitespace =~ s/^\t+/ /g ) { - $entabbed = "entabbed"; - } - - # now compute tabbing from number of spaces - my $columns = - length($leading_whitespace) / $structural_indentation_level; - if ( $columns == int $columns ) { - $msg = - "Guessing old indentation was $columns $entabbed spaces\n"; - } - else { - $columns = int $columns; - $msg = -"old indentation is unclear, using $columns $entabbed spaces\n"; - } - $input_tabstr = " " x $columns; - } - $know_input_tabstr = 1; - $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; - $tokenizer_self->{_input_tabstr} = $input_tabstr; - - # see if mistakes were made - if ( ( $tokenizer_self->{_starting_level} > 0 ) - && !$tokenizer_self->{_know_starting_level} ) - { - - if ( $input_tabstr ne $saved_input_tabstr ) { - complain( -"I made a bad starting level guess; rerun with a value for -sil \n" - ); - } - } - } - - # use current guess at input tabbing to get input indentation level - # - # Patch to handle a common case of entabbed leading whitespace - # If the leading whitespace equals 4 spaces and we also have - # tabs, detab the input whitespace assuming 8 spaces per tab. - if ( length($input_tabstr) == 4 ) { - $leading_whitespace =~ s/^\t+/ /g; - } - - if ( ( my $len_tab = length($input_tabstr) ) > 0 ) { - my $pos = 0; - - while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr ) - { - $pos += $len_tab; - $level++; - } - } - return ( $level, $msg ); -} - -# This is a currently unused debug routine -sub dump_functions { - - my $fh = *STDOUT; - my ( $pkg, $sub ); - foreach $pkg ( keys %is_user_function ) { - print $fh "\nnon-constant subs in package $pkg\n"; - - foreach $sub ( keys %{ $is_user_function{$pkg} } ) { - my $msg = ""; - if ( $is_block_list_function{$pkg}{$sub} ) { - $msg = 'block_list'; - } - - if ( $is_block_function{$pkg}{$sub} ) { - $msg = 'block'; - } - print $fh "$sub $msg\n"; - } - } - - foreach $pkg ( keys %is_constant ) { - print $fh "\nconstants and constant subs in package $pkg\n"; - - foreach $sub ( keys %{ $is_constant{$pkg} } ) { - print $fh "$sub\n"; - } - } -} - -sub ones_count { - - # count number of 1's in a string of 1's and 0's - # example: ones_count("010101010101") gives 6 - return ( my $cis = $_[0] ) =~ tr/1/0/; -} - -sub prepare_for_a_new_file { - - # previous tokens needed to determine what to expect next - $last_nonblank_token = ';'; # the only possible starting state which - $last_nonblank_type = ';'; # will make a leading brace a code block - $last_nonblank_block_type = ''; - - # scalars for remembering statement types across multiple lines - $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' - $in_attribute_list = 0; - - # scalars for remembering where we are in the file - $current_package = "main"; - $context = UNKNOWN_CONTEXT; - - # hashes used to remember function information - %is_constant = (); # user-defined constants - %is_user_function = (); # user-defined functions - %user_function_prototype = (); # their prototypes - %is_block_function = (); - %is_block_list_function = (); - %saw_function_definition = (); - - # variables used to track depths of various containers - # and report nesting errors - $paren_depth = 0; - $brace_depth = 0; - $square_bracket_depth = 0; - @current_depth[ 0 .. $#closing_brace_names ] = - (0) x scalar @closing_brace_names; - $total_depth = 0; - @total_depth = (); - @nesting_sequence_number[ 0 .. $#closing_brace_names ] = - ( 0 .. $#closing_brace_names ); - @current_sequence_number = (); - $paren_type[$paren_depth] = ''; - $paren_semicolon_count[$paren_depth] = 0; - $paren_structural_type[$brace_depth] = ''; - $brace_type[$brace_depth] = ';'; # identify opening brace as code block - $brace_structural_type[$brace_depth] = ''; - $brace_statement_type[$brace_depth] = ""; - $brace_context[$brace_depth] = UNKNOWN_CONTEXT; - $brace_package[$paren_depth] = $current_package; - $square_bracket_type[$square_bracket_depth] = ''; - $square_bracket_structural_type[$square_bracket_depth] = ''; - - initialize_tokenizer_state(); -} - -{ # begin tokenize_this_line - - use constant BRACE => 0; - use constant SQUARE_BRACKET => 1; - use constant PAREN => 2; - use constant QUESTION_COLON => 3; - - # TV1: scalars for processing one LINE. - # Re-initialized on each entry to sub tokenize_this_line. - my ( - $block_type, $container_type, $expecting, - $i, $i_tok, $input_line, - $input_line_number, $last_nonblank_i, $max_token_index, - $next_tok, $next_type, $peeked_ahead, - $prototype, $rhere_target_list, $rtoken_map, - $rtoken_type, $rtokens, $tok, - $type, $type_sequence, $indent_flag, - ); - - # TV2: refs to ARRAYS for processing one LINE - # Re-initialized on each call. - my $routput_token_list = []; # stack of output token indexes - my $routput_token_type = []; # token types - my $routput_block_type = []; # types of code block - my $routput_container_type = []; # paren types, such as if, elsif, .. - my $routput_type_sequence = []; # nesting sequential number - my $routput_indent_flag = []; # - - # TV3: SCALARS for quote variables. These are initialized with a - # subroutine call and continually updated as lines are processed. - my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, - $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); - - # TV4: SCALARS for multi-line identifiers and - # statements. These are initialized with a subroutine call - # and continually updated as lines are processed. - my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); - - # TV5: SCALARS for tracking indentation level. - # Initialized once and continually updated as lines are - # processed. - my ( - $nesting_token_string, $nesting_type_string, - $nesting_block_string, $nesting_block_flag, - $nesting_list_string, $nesting_list_flag, - $ci_string_in_tokenizer, $continuation_string_in_tokenizer, - $in_statement_continuation, $level_in_tokenizer, - $slevel_in_tokenizer, $rslevel_stack, - ); - - # TV6: SCALARS for remembering several previous - # tokens. Initialized once and continually updated as - # lines are processed. - my ( - $last_nonblank_container_type, $last_nonblank_type_sequence, - $last_last_nonblank_token, $last_last_nonblank_type, - $last_last_nonblank_block_type, $last_last_nonblank_container_type, - $last_last_nonblank_type_sequence, $last_nonblank_prototype, - ); - - # ---------------------------------------------------------------- - # beginning of tokenizer variable access and manipulation routines - # ---------------------------------------------------------------- - - sub initialize_tokenizer_state { - - # TV1: initialized on each call - # TV2: initialized on each call - # TV3: - $in_quote = 0; - $quote_type = 'Q'; - $quote_character = ""; - $quote_pos = 0; - $quote_depth = 0; - $quoted_string_1 = ""; - $quoted_string_2 = ""; - $allowed_quote_modifiers = ""; - - # TV4: - $id_scan_state = ''; - $identifier = ''; - $want_paren = ""; - $indented_if_level = 0; - - # TV5: - $nesting_token_string = ""; - $nesting_type_string = ""; - $nesting_block_string = '1'; # initially in a block - $nesting_block_flag = 1; - $nesting_list_string = '0'; # initially not in a list - $nesting_list_flag = 0; # initially not in a list - $ci_string_in_tokenizer = ""; - $continuation_string_in_tokenizer = "0"; - $in_statement_continuation = 0; - $level_in_tokenizer = 0; - $slevel_in_tokenizer = 0; - $rslevel_stack = []; - - # TV6: - $last_nonblank_container_type = ''; - $last_nonblank_type_sequence = ''; - $last_last_nonblank_token = ';'; - $last_last_nonblank_type = ';'; - $last_last_nonblank_block_type = ''; - $last_last_nonblank_container_type = ''; - $last_last_nonblank_type_sequence = ''; - $last_nonblank_prototype = ""; - } - - sub save_tokenizer_state { - - my $rTV1 = [ - $block_type, $container_type, $expecting, - $i, $i_tok, $input_line, - $input_line_number, $last_nonblank_i, $max_token_index, - $next_tok, $next_type, $peeked_ahead, - $prototype, $rhere_target_list, $rtoken_map, - $rtoken_type, $rtokens, $tok, - $type, $type_sequence, $indent_flag, - ]; - - my $rTV2 = [ - $routput_token_list, $routput_token_type, - $routput_block_type, $routput_container_type, - $routput_type_sequence, $routput_indent_flag, - ]; - - my $rTV3 = [ - $in_quote, $quote_type, - $quote_character, $quote_pos, - $quote_depth, $quoted_string_1, - $quoted_string_2, $allowed_quote_modifiers, - ]; - - my $rTV4 = - [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; - - my $rTV5 = [ - $nesting_token_string, $nesting_type_string, - $nesting_block_string, $nesting_block_flag, - $nesting_list_string, $nesting_list_flag, - $ci_string_in_tokenizer, $continuation_string_in_tokenizer, - $in_statement_continuation, $level_in_tokenizer, - $slevel_in_tokenizer, $rslevel_stack, - ]; - - my $rTV6 = [ - $last_nonblank_container_type, - $last_nonblank_type_sequence, - $last_last_nonblank_token, - $last_last_nonblank_type, - $last_last_nonblank_block_type, - $last_last_nonblank_container_type, - $last_last_nonblank_type_sequence, - $last_nonblank_prototype, - ]; - return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; - } - - sub restore_tokenizer_state { - my ($rstate) = @_; - my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; - ( - $block_type, $container_type, $expecting, - $i, $i_tok, $input_line, - $input_line_number, $last_nonblank_i, $max_token_index, - $next_tok, $next_type, $peeked_ahead, - $prototype, $rhere_target_list, $rtoken_map, - $rtoken_type, $rtokens, $tok, - $type, $type_sequence, $indent_flag, - ) = @{$rTV1}; - - ( - $routput_token_list, $routput_token_type, - $routput_block_type, $routput_container_type, - $routput_type_sequence, $routput_type_sequence, - ) = @{$rTV2}; - - ( - $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, - $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, - ) = @{$rTV3}; - - ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = - @{$rTV4}; - - ( - $nesting_token_string, $nesting_type_string, - $nesting_block_string, $nesting_block_flag, - $nesting_list_string, $nesting_list_flag, - $ci_string_in_tokenizer, $continuation_string_in_tokenizer, - $in_statement_continuation, $level_in_tokenizer, - $slevel_in_tokenizer, $rslevel_stack, - ) = @{$rTV5}; - - ( - $last_nonblank_container_type, - $last_nonblank_type_sequence, - $last_last_nonblank_token, - $last_last_nonblank_type, - $last_last_nonblank_block_type, - $last_last_nonblank_container_type, - $last_last_nonblank_type_sequence, - $last_nonblank_prototype, - ) = @{$rTV6}; - } - - sub get_indentation_level { - - # patch to avoid reporting error if indented if is not terminated - if ($indented_if_level) { return $level_in_tokenizer - 1 } - return $level_in_tokenizer; - } - - sub reset_indentation_level { - $level_in_tokenizer = $_[0]; - $slevel_in_tokenizer = $_[0]; - push @{$rslevel_stack}, $slevel_in_tokenizer; - } - - sub peeked_ahead { - $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead; - } - - # ------------------------------------------------------------ - # end of tokenizer variable access and manipulation routines - # ------------------------------------------------------------ - - # ------------------------------------------------------------ - # beginning of various scanner interface routines - # ------------------------------------------------------------ - sub scan_replacement_text { - - # check for here-docs in replacement text invoked by - # a substitution operator with executable modifier 'e'. - # - # given: - # $replacement_text - # return: - # $rht = reference to any here-doc targets - my ($replacement_text) = @_; - - # quick check - return undef unless ( $replacement_text =~ /<{_logger_object}; - - # localize all package variables - local ( - $tokenizer_self, $last_nonblank_token, - $last_nonblank_type, $last_nonblank_block_type, - $statement_type, $in_attribute_list, - $current_package, $context, - %is_constant, %is_user_function, - %user_function_prototype, %is_block_function, - %is_block_list_function, %saw_function_definition, - $brace_depth, $paren_depth, - $square_bracket_depth, @current_depth, - @total_depth, $total_depth, - @nesting_sequence_number, @current_sequence_number, - @paren_type, @paren_semicolon_count, - @paren_structural_type, @brace_type, - @brace_structural_type, @brace_statement_type, - @brace_context, @brace_package, - @square_bracket_type, @square_bracket_structural_type, - @depth_array, @starting_line_of_current_depth, - @nested_ternary_flag, - ); - - # save all lexical variables - my $rstate = save_tokenizer_state(); - _decrement_count(); # avoid error check for multiple tokenizers - - # make a new tokenizer - my $rOpts = {}; - my $rpending_logfile_message; - my $source_object = - Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, - $rpending_logfile_message ); - my $tokenizer = Perl::Tidy::Tokenizer->new( - source_object => $source_object, - logger_object => $logger_object, - starting_line_number => $input_line_number, - ); - - # scan the replacement text - 1 while ( $tokenizer->get_line() ); - - # remove any here doc targets - my $rht = undef; - if ( $tokenizer_self->{_in_here_doc} ) { - $rht = []; - push @{$rht}, - [ - $tokenizer_self->{_here_doc_target}, - $tokenizer_self->{_here_quote_character} - ]; - if ( $tokenizer_self->{_rhere_target_list} ) { - push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; - $tokenizer_self->{_rhere_target_list} = undef; - } - $tokenizer_self->{_in_here_doc} = undef; - } - - # now its safe to report errors - $tokenizer->report_tokenization_errors(); - - # restore all tokenizer lexical variables - restore_tokenizer_state($rstate); - - # return the here doc targets - return $rht; - } - - sub scan_bare_identifier { - ( $i, $tok, $type, $prototype ) = - scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, - $rtoken_map, $max_token_index ); - } - - sub scan_identifier { - ( $i, $tok, $type, $id_scan_state, $identifier ) = - scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, - $max_token_index, $expecting ); - } - - sub scan_id { - ( $i, $tok, $type, $id_scan_state ) = - scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, - $id_scan_state, $max_token_index ); - } - - sub scan_number { - my $number; - ( $i, $type, $number ) = - scan_number_do( $input_line, $i, $rtoken_map, $type, - $max_token_index ); - return $number; - } - - # a sub to warn if token found where term expected - sub error_if_expecting_TERM { - if ( $expecting == TERM ) { - if ( $really_want_term{$last_nonblank_type} ) { - unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map, - $rtoken_type, $input_line ); - 1; - } - } - } - - # a sub to warn if token found where operator expected - sub error_if_expecting_OPERATOR { - if ( $expecting == OPERATOR ) { - my $thing = defined $_[0] ? $_[0] : $tok; - unexpected( $thing, "operator", $i_tok, $last_nonblank_i, - $rtoken_map, $rtoken_type, $input_line ); - if ( $i_tok == 0 ) { - interrupt_logfile(); - warning("Missing ';' above?\n"); - resume_logfile(); - } - 1; - } - } - - # ------------------------------------------------------------ - # end scanner interfaces - # ------------------------------------------------------------ - - my %is_for_foreach; - @_ = qw(for foreach); - @is_for_foreach{@_} = (1) x scalar(@_); - - my %is_my_our; - @_ = qw(my our); - @is_my_our{@_} = (1) x scalar(@_); - - # These keywords may introduce blocks after parenthesized expressions, - # in the form: - # keyword ( .... ) { BLOCK } - # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' - my %is_blocktype_with_paren; - @_ = qw(if elsif unless while until for foreach switch case given when); - @is_blocktype_with_paren{@_} = (1) x scalar(@_); - - # ------------------------------------------------------------ - # begin hash of code for handling most token types - # ------------------------------------------------------------ - my $tokenization_code = { - - # no special code for these types yet, but syntax checks - # could be added - -## '!' => undef, -## '!=' => undef, -## '!~' => undef, -## '%=' => undef, -## '&&=' => undef, -## '&=' => undef, -## '+=' => undef, -## '-=' => undef, -## '..' => undef, -## '..' => undef, -## '...' => undef, -## '.=' => undef, -## '<<=' => undef, -## '<=' => undef, -## '<=>' => undef, -## '<>' => undef, -## '=' => undef, -## '==' => undef, -## '=~' => undef, -## '>=' => undef, -## '>>' => undef, -## '>>=' => undef, -## '\\' => undef, -## '^=' => undef, -## '|=' => undef, -## '||=' => undef, -## '//=' => undef, -## '~' => undef, -## '~~' => undef, -## '!~~' => undef, - - '>' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - '|' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - '$' => sub { - - # start looking for a scalar - error_if_expecting_OPERATOR("Scalar") - if ( $expecting == OPERATOR ); - scan_identifier(); - - if ( $identifier eq '$^W' ) { - $tokenizer_self->{_saw_perl_dash_w} = 1; - } - - # Check for indentifier in indirect object slot - # (vorboard.pl, sort.t). Something like: - # /^(print|printf|sort|exec|system)$/ - if ( - $is_indirect_object_taker{$last_nonblank_token} - - || ( ( $last_nonblank_token eq '(' ) - && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) - || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object - ) - { - $type = 'Z'; - } - }, - '(' => sub { - - ++$paren_depth; - $paren_semicolon_count[$paren_depth] = 0; - if ($want_paren) { - $container_type = $want_paren; - $want_paren = ""; - } - else { - $container_type = $last_nonblank_token; - - # We can check for a syntax error here of unexpected '(', - # but this is going to get messy... - if ( - $expecting == OPERATOR - - # be sure this is not a method call of the form - # &method(...), $method->(..), &{method}(...), - # $ref[2](list) is ok & short for $ref[2]->(list) - # NOTE: at present, braces in something like &{ xxx } - # are not marked as a block, we might have a method call - && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ - - ) - { - - # ref: camel 3 p 703. - if ( $last_last_nonblank_token eq 'do' ) { - complain( -"do SUBROUTINE is deprecated; consider & or -> notation\n" - ); - } - else { - - # if this is an empty list, (), then it is not an - # error; for example, we might have a constant pi and - # invoke it with pi() or just pi; - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, - $max_token_index ); - if ( $next_nonblank_token ne ')' ) { - my $hint; - error_if_expecting_OPERATOR('('); - - if ( $last_nonblank_type eq 'C' ) { - $hint = - "$last_nonblank_token has a void prototype\n"; - } - elsif ( $last_nonblank_type eq 'i' ) { - if ( $i_tok > 0 - && $last_nonblank_token =~ /^\$/ ) - { - $hint = -"Do you mean '$last_nonblank_token->(' ?\n"; - } - } - if ($hint) { - interrupt_logfile(); - warning($hint); - resume_logfile(); - } - } ## end if ( $next_nonblank_token... - } ## end else [ if ( $last_last_nonblank_token... - } ## end if ( $expecting == OPERATOR... - } - $paren_type[$paren_depth] = $container_type; - ( $type_sequence, $indent_flag ) = - increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); - - # propagate types down through nested parens - # for example: the second paren in 'if ((' would be structural - # since the first is. - - if ( $last_nonblank_token eq '(' ) { - $type = $last_nonblank_type; - } - - # We exclude parens as structural after a ',' because it - # causes subtle problems with continuation indentation for - # something like this, where the first 'or' will not get - # indented. - # - # assert( - # __LINE__, - # ( not defined $check ) - # or ref $check - # or $check eq "new" - # or $check eq "old", - # ); - # - # Likewise, we exclude parens where a statement can start - # because of problems with continuation indentation, like - # these: - # - # ($firstline =~ /^#\!.*perl/) - # and (print $File::Find::name, "\n") - # and (return 1); - # - # (ref($usage_fref) =~ /CODE/) - # ? &$usage_fref - # : (&blast_usage, &blast_params, &blast_general_params); - - else { - $type = '{'; - } - - if ( $last_nonblank_type eq ')' ) { - warning( - "Syntax error? found token '$last_nonblank_type' then '('\n" - ); - } - $paren_structural_type[$paren_depth] = $type; - - }, - ')' => sub { - ( $type_sequence, $indent_flag ) = - decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); - - if ( $paren_structural_type[$paren_depth] eq '{' ) { - $type = '}'; - } - - $container_type = $paren_type[$paren_depth]; - - # /^(for|foreach)$/ - if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { - my $num_sc = $paren_semicolon_count[$paren_depth]; - if ( $num_sc > 0 && $num_sc != 2 ) { - warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); - } - } - - if ( $paren_depth > 0 ) { $paren_depth-- } - }, - ',' => sub { - if ( $last_nonblank_type eq ',' ) { - complain("Repeated ','s \n"); - } - - # patch for operator_expected: note if we are in the list (use.t) - if ( $statement_type eq 'use' ) { $statement_type = '_use' } -## FIXME: need to move this elsewhere, perhaps check after a '(' -## elsif ($last_nonblank_token eq '(') { -## warning("Leading ','s illegal in some versions of perl\n"); -## } - }, - ';' => sub { - $context = UNKNOWN_CONTEXT; - $statement_type = ''; - - # /^(for|foreach)$/ - if ( $is_for_foreach{ $paren_type[$paren_depth] } ) - { # mark ; in for loop - - # Be careful: we do not want a semicolon such as the - # following to be included: - # - # for (sort {strcoll($a,$b);} keys %investments) { - - if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] - && $square_bracket_depth == - $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) - { - - $type = 'f'; - $paren_semicolon_count[$paren_depth]++; - } - } - - }, - '"' => sub { - error_if_expecting_OPERATOR("String") - if ( $expecting == OPERATOR ); - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = ""; - }, - "'" => sub { - error_if_expecting_OPERATOR("String") - if ( $expecting == OPERATOR ); - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = ""; - }, - '`' => sub { - error_if_expecting_OPERATOR("String") - if ( $expecting == OPERATOR ); - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = ""; - }, - '/' => sub { - my $is_pattern; - - if ( $expecting == UNKNOWN ) { # indeterminte, must guess.. - my $msg; - ( $is_pattern, $msg ) = - guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, - $max_token_index ); - - if ($msg) { - write_diagnostics("DIVIDE:$msg\n"); - write_logfile_entry($msg); - } - } - else { $is_pattern = ( $expecting == TERM ) } - - if ($is_pattern) { - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = '[cgimosxp]'; - } - else { # not a pattern; check for a /= token - - if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /= - $i++; - $tok = '/='; - $type = $tok; - } - - #DEBUG - collecting info on what tokens follow a divide - # for development of guessing algorithm - #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { - # #write_diagnostics( "DIVIDE? $input_line\n" ); - #} - } - }, - '{' => sub { - - # if we just saw a ')', we will label this block with - # its type. We need to do this to allow sub - # code_block_type to determine if this brace starts a - # code block or anonymous hash. (The type of a paren - # pair is the preceding token, such as 'if', 'else', - # etc). - $container_type = ""; - - # ATTRS: for a '{' following an attribute list, reset - # things to look like we just saw the sub name - if ( $statement_type =~ /^sub/ ) { - $last_nonblank_token = $statement_type; - $last_nonblank_type = 'i'; - $statement_type = ""; - } - - # patch for SWITCH/CASE: hide these keywords from an immediately - # following opening brace - elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) - && $statement_type eq $last_nonblank_token ) - { - $last_nonblank_token = ";"; - } - - elsif ( $last_nonblank_token eq ')' ) { - $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; - - # defensive move in case of a nesting error (pbug.t) - # in which this ')' had no previous '(' - # this nesting error will have been caught - if ( !defined($last_nonblank_token) ) { - $last_nonblank_token = 'if'; - } - - # check for syntax error here; - unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { - my $list = join( ' ', sort keys %is_blocktype_with_paren ); - warning( - "syntax error at ') {', didn't see one of: $list\n"); - } - } - - # patch for paren-less for/foreach glitch, part 2. - # see note below under 'qw' - elsif ($last_nonblank_token eq 'qw' - && $is_for_foreach{$want_paren} ) - { - $last_nonblank_token = $want_paren; - if ( $last_last_nonblank_token eq $want_paren ) { - warning( -"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" - ); - - } - $want_paren = ""; - } - - # now identify which of the three possible types of - # curly braces we have: hash index container, anonymous - # hash reference, or code block. - - # non-structural (hash index) curly brace pair - # get marked 'L' and 'R' - if ( is_non_structural_brace() ) { - $type = 'L'; - - # patch for SWITCH/CASE: - # allow paren-less identifier after 'when' - # if the brace is preceded by a space - if ( $statement_type eq 'when' - && $last_nonblank_type eq 'i' - && $last_last_nonblank_type eq 'k' - && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) - { - $type = '{'; - $block_type = $statement_type; - } - } - - # code and anonymous hash have the same type, '{', but are - # distinguished by 'block_type', - # which will be blank for an anonymous hash - else { - - $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, - $max_token_index ); - - # patch to promote bareword type to function taking block - if ( $block_type - && $last_nonblank_type eq 'w' - && $last_nonblank_i >= 0 ) - { - if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { - $routput_token_type->[$last_nonblank_i] = 'G'; - } - } - - # patch for SWITCH/CASE: if we find a stray opening block brace - # where we might accept a 'case' or 'when' block, then take it - if ( $statement_type eq 'case' - || $statement_type eq 'when' ) - { - if ( !$block_type || $block_type eq '}' ) { - $block_type = $statement_type; - } - } - } - $brace_type[ ++$brace_depth ] = $block_type; - $brace_package[$brace_depth] = $current_package; - ( $type_sequence, $indent_flag ) = - increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); - $brace_structural_type[$brace_depth] = $type; - $brace_context[$brace_depth] = $context; - $brace_statement_type[$brace_depth] = $statement_type; - }, - '}' => sub { - $block_type = $brace_type[$brace_depth]; - if ($block_type) { $statement_type = '' } - if ( defined( $brace_package[$brace_depth] ) ) { - $current_package = $brace_package[$brace_depth]; - } - - # can happen on brace error (caught elsewhere) - else { - } - ( $type_sequence, $indent_flag ) = - decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); - - if ( $brace_structural_type[$brace_depth] eq 'L' ) { - $type = 'R'; - } - - # propagate type information for 'do' and 'eval' blocks. - # This is necessary to enable us to know if an operator - # or term is expected next - if ( $is_block_operator{ $brace_type[$brace_depth] } ) { - $tok = $brace_type[$brace_depth]; - } - - $context = $brace_context[$brace_depth]; - $statement_type = $brace_statement_type[$brace_depth]; - if ( $brace_depth > 0 ) { $brace_depth--; } - }, - '&' => sub { # maybe sub call? start looking - - # We have to check for sub call unless we are sure we - # are expecting an operator. This example from s2p - # got mistaken as a q operator in an early version: - # print BODY &q(<<'EOT'); - if ( $expecting != OPERATOR ) { - scan_identifier(); - } - else { - } - }, - '<' => sub { # angle operator or less than? - - if ( $expecting != OPERATOR ) { - ( $i, $type ) = - find_angle_operator_termination( $input_line, $i, $rtoken_map, - $expecting, $max_token_index ); - - } - else { - } - }, - '?' => sub { # ?: conditional or starting pattern? - - my $is_pattern; - - if ( $expecting == UNKNOWN ) { - - my $msg; - ( $is_pattern, $msg ) = - guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, - $max_token_index ); - - if ($msg) { write_logfile_entry($msg) } - } - else { $is_pattern = ( $expecting == TERM ) } - - if ($is_pattern) { - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = '[cgimosxp]'; - } - else { - ( $type_sequence, $indent_flag ) = - increase_nesting_depth( QUESTION_COLON, - $$rtoken_map[$i_tok] ); - } - }, - '*' => sub { # typeglob, or multiply? - - if ( $expecting == TERM ) { - scan_identifier(); - } - else { - - if ( $$rtokens[ $i + 1 ] eq '=' ) { - $tok = '*='; - $type = $tok; - $i++; - } - elsif ( $$rtokens[ $i + 1 ] eq '*' ) { - $tok = '**'; - $type = $tok; - $i++; - if ( $$rtokens[ $i + 1 ] eq '=' ) { - $tok = '**='; - $type = $tok; - $i++; - } - } - } - }, - '.' => sub { # what kind of . ? - - if ( $expecting != OPERATOR ) { - scan_number(); - if ( $type eq '.' ) { - error_if_expecting_TERM() - if ( $expecting == TERM ); - } - } - else { - } - }, - ':' => sub { - - # if this is the first nonblank character, call it a label - # since perl seems to just swallow it - if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { - $type = 'J'; - } - - # ATTRS: check for a ':' which introduces an attribute list - # (this might eventually get its own token type) - elsif ( $statement_type =~ /^sub/ ) { - $type = 'A'; - $in_attribute_list = 1; - } - - # check for scalar attribute, such as - # my $foo : shared = 1; - elsif ($is_my_our{$statement_type} - && $current_depth[QUESTION_COLON] == 0 ) - { - $type = 'A'; - $in_attribute_list = 1; - } - - # otherwise, it should be part of a ?/: operator - else { - ( $type_sequence, $indent_flag ) = - decrease_nesting_depth( QUESTION_COLON, - $$rtoken_map[$i_tok] ); - if ( $last_nonblank_token eq '?' ) { - warning("Syntax error near ? :\n"); - } - } - }, - '+' => sub { # what kind of plus? - - if ( $expecting == TERM ) { - my $number = scan_number(); - - # unary plus is safest assumption if not a number - if ( !defined($number) ) { $type = 'p'; } - } - elsif ( $expecting == OPERATOR ) { - } - else { - if ( $next_type eq 'w' ) { $type = 'p' } - } - }, - '@' => sub { - - error_if_expecting_OPERATOR("Array") - if ( $expecting == OPERATOR ); - scan_identifier(); - }, - '%' => sub { # hash or modulo? - - # first guess is hash if no following blank - if ( $expecting == UNKNOWN ) { - if ( $next_type ne 'b' ) { $expecting = TERM } - } - if ( $expecting == TERM ) { - scan_identifier(); - } - }, - '[' => sub { - $square_bracket_type[ ++$square_bracket_depth ] = - $last_nonblank_token; - ( $type_sequence, $indent_flag ) = - increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); - - # It may seem odd, but structural square brackets have - # type '{' and '}'. This simplifies the indentation logic. - if ( !is_non_structural_brace() ) { - $type = '{'; - } - $square_bracket_structural_type[$square_bracket_depth] = $type; - }, - ']' => sub { - ( $type_sequence, $indent_flag ) = - decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); - - if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) - { - $type = '}'; - } - if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } - }, - '-' => sub { # what kind of minus? - - if ( ( $expecting != OPERATOR ) - && $is_file_test_operator{$next_tok} ) - { - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i + 1, $rtokens, - $max_token_index ); - - # check for a quoted word like "-w=>xx"; - # it is sufficient to just check for a following '=' - if ( $next_nonblank_token eq '=' ) { - $type = 'm'; - } - else { - $i++; - $tok .= $next_tok; - $type = 'F'; - } - } - elsif ( $expecting == TERM ) { - my $number = scan_number(); - - # maybe part of bareword token? unary is safest - if ( !defined($number) ) { $type = 'm'; } - - } - elsif ( $expecting == OPERATOR ) { - } - else { - - if ( $next_type eq 'w' ) { - $type = 'm'; - } - } - }, - - '^' => sub { - - # check for special variables like ${^WARNING_BITS} - if ( $expecting == TERM ) { - - # FIXME: this should work but will not catch errors - # because we also have to be sure that previous token is - # a type character ($,@,%). - if ( $last_nonblank_token eq '{' - && ( $next_tok =~ /^[A-Za-z_]/ ) ) - { - - if ( $next_tok eq 'W' ) { - $tokenizer_self->{_saw_perl_dash_w} = 1; - } - $tok = $tok . $next_tok; - $i = $i + 1; - $type = 'w'; - } - - else { - unless ( error_if_expecting_TERM() ) { - - # Something like this is valid but strange: - # undef ^I; - complain("The '^' seems unusual here\n"); - } - } - } - }, - - '::' => sub { # probably a sub call - scan_bare_identifier(); - }, - '<<' => sub { # maybe a here-doc? - return - unless ( $i < $max_token_index ) - ; # here-doc not possible if end of line - - if ( $expecting != OPERATOR ) { - my ( $found_target, $here_doc_target, $here_quote_character, - $saw_error ); - ( - $found_target, $here_doc_target, $here_quote_character, $i, - $saw_error - ) - = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, - $max_token_index ); - - if ($found_target) { - push @{$rhere_target_list}, - [ $here_doc_target, $here_quote_character ]; - $type = 'h'; - if ( length($here_doc_target) > 80 ) { - my $truncated = substr( $here_doc_target, 0, 80 ); - complain("Long here-target: '$truncated' ...\n"); - } - elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { - complain( - "Unconventional here-target: '$here_doc_target'\n" - ); - } - } - elsif ( $expecting == TERM ) { - unless ($saw_error) { - - # shouldn't happen.. - warning("Program bug; didn't find here doc target\n"); - report_definite_bug(); - } - } - } - else { - } - }, - '->' => sub { - - # if -> points to a bare word, we must scan for an identifier, - # otherwise something like ->y would look like the y operator - scan_identifier(); - }, - - # type = 'pp' for pre-increment, '++' for post-increment - '++' => sub { - if ( $expecting == TERM ) { $type = 'pp' } - elsif ( $expecting == UNKNOWN ) { - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - if ( $next_nonblank_token eq '$' ) { $type = 'pp' } - } - }, - - '=>' => sub { - if ( $last_nonblank_type eq $tok ) { - complain("Repeated '=>'s \n"); - } - - # patch for operator_expected: note if we are in the list (use.t) - # TODO: make version numbers a new token type - if ( $statement_type eq 'use' ) { $statement_type = '_use' } - }, - - # type = 'mm' for pre-decrement, '--' for post-decrement - '--' => sub { - - if ( $expecting == TERM ) { $type = 'mm' } - elsif ( $expecting == UNKNOWN ) { - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - if ( $next_nonblank_token eq '$' ) { $type = 'mm' } - } - }, - - '&&' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - - '||' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - - '//' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - }; - - # ------------------------------------------------------------ - # end hash of code for handling individual token types - # ------------------------------------------------------------ - - my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); - - # These block types terminate statements and do not need a trailing - # semicolon - # patched for SWITCH/CASE: - my %is_zero_continuation_block_type; - @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; - if elsif else unless while until for foreach switch case given when); - @is_zero_continuation_block_type{@_} = (1) x scalar(@_); - - my %is_not_zero_continuation_block_type; - @_ = qw(sort grep map do eval); - @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); - - my %is_logical_container; - @_ = qw(if elsif unless while and or err not && ! || for foreach); - @is_logical_container{@_} = (1) x scalar(@_); - - my %is_binary_type; - @_ = qw(|| &&); - @is_binary_type{@_} = (1) x scalar(@_); - - my %is_binary_keyword; - @_ = qw(and or err eq ne cmp); - @is_binary_keyword{@_} = (1) x scalar(@_); - - # 'L' is token for opening { at hash key - my %is_opening_type; - @_ = qw" L { ( [ "; - @is_opening_type{@_} = (1) x scalar(@_); - - # 'R' is token for closing } at hash key - my %is_closing_type; - @_ = qw" R } ) ] "; - @is_closing_type{@_} = (1) x scalar(@_); - - my %is_redo_last_next_goto; - @_ = qw(redo last next goto); - @is_redo_last_next_goto{@_} = (1) x scalar(@_); - - my %is_use_require; - @_ = qw(use require); - @is_use_require{@_} = (1) x scalar(@_); - - my %is_sub_package; - @_ = qw(sub package); - @is_sub_package{@_} = (1) x scalar(@_); - - # This hash holds the hash key in $tokenizer_self for these keywords: - my %is_format_END_DATA = ( - 'format' => '_in_format', - '__END__' => '_in_end', - '__DATA__' => '_in_data', - ); - - # ref: camel 3 p 147, - # but perl may accept undocumented flags - # perl 5.10 adds 'p' (preserve) - my %quote_modifiers = ( - 's' => '[cegimosxp]', - 'y' => '[cds]', - 'tr' => '[cds]', - 'm' => '[cgimosxp]', - 'qr' => '[imosxp]', - 'q' => "", - 'qq' => "", - 'qw' => "", - 'qx' => "", - ); - - # table showing how many quoted things to look for after quote operator.. - # s, y, tr have 2 (pattern and replacement) - # others have 1 (pattern only) - my %quote_items = ( - 's' => 2, - 'y' => 2, - 'tr' => 2, - 'm' => 1, - 'qr' => 1, - 'q' => 1, - 'qq' => 1, - 'qw' => 1, - 'qx' => 1, - ); - - sub tokenize_this_line { - - # This routine breaks a line of perl code into tokens which are of use in - # indentation and reformatting. One of my goals has been to define tokens - # such that a newline may be inserted between any pair of tokens without - # changing or invalidating the program. This version comes close to this, - # although there are necessarily a few exceptions which must be caught by - # the formatter. Many of these involve the treatment of bare words. - # - # The tokens and their types are returned in arrays. See previous - # routine for their names. - # - # See also the array "valid_token_types" in the BEGIN section for an - # up-to-date list. - # - # To simplify things, token types are either a single character, or they - # are identical to the tokens themselves. - # - # As a debugging aid, the -D flag creates a file containing a side-by-side - # comparison of the input string and its tokenization for each line of a file. - # This is an invaluable debugging aid. - # - # In addition to tokens, and some associated quantities, the tokenizer - # also returns flags indication any special line types. These include - # quotes, here_docs, formats. - # - # ----------------------------------------------------------------------- - # - # How to add NEW_TOKENS: - # - # New token types will undoubtedly be needed in the future both to keep up - # with changes in perl and to help adapt the tokenizer to other applications. - # - # Here are some notes on the minimal steps. I wrote these notes while - # adding the 'v' token type for v-strings, which are things like version - # numbers 5.6.0, and ip addresses, and will use that as an example. ( You - # can use your editor to search for the string "NEW_TOKENS" to find the - # appropriate sections to change): - # - # *. Try to talk somebody else into doing it! If not, .. - # - # *. Make a backup of your current version in case things don't work out! - # - # *. Think of a new, unused character for the token type, and add to - # the array @valid_token_types in the BEGIN section of this package. - # For example, I used 'v' for v-strings. - # - # *. Implement coding to recognize the $type of the token in this routine. - # This is the hardest part, and is best done by immitating or modifying - # some of the existing coding. For example, to recognize v-strings, I - # patched 'sub scan_bare_identifier' to recognize v-strings beginning with - # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. - # - # *. Update sub operator_expected. This update is critically important but - # the coding is trivial. Look at the comments in that routine for help. - # For v-strings, which should behave like numbers, I just added 'v' to the - # regex used to handle numbers and strings (types 'n' and 'Q'). - # - # *. Implement a 'bond strength' rule in sub set_bond_strengths in - # Perl::Tidy::Formatter for breaking lines around this token type. You can - # skip this step and take the default at first, then adjust later to get - # desired results. For adding type 'v', I looked at sub bond_strength and - # saw that number type 'n' was using default strengths, so I didn't do - # anything. I may tune it up someday if I don't like the way line - # breaks with v-strings look. - # - # *. Implement a 'whitespace' rule in sub set_white_space_flag in - # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine - # and saw that type 'n' used spaces on both sides, so I just added 'v' - # to the array @spaces_both_sides. - # - # *. Update HtmlWriter package so that users can colorize the token as - # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in - # that package. For v-strings, I initially chose to use a default color - # equal to the default for numbers, but it might be nice to change that - # eventually. - # - # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. - # - # *. Run lots and lots of debug tests. Start with special files designed - # to test the new token type. Run with the -D flag to create a .DEBUG - # file which shows the tokenization. When these work ok, test as many old - # scripts as possible. Start with all of the '.t' files in the 'test' - # directory of the distribution file. Compare .tdy output with previous - # version and updated version to see the differences. Then include as - # many more files as possible. My own technique has been to collect a huge - # number of perl scripts (thousands!) into one directory and run perltidy - # *, then run diff between the output of the previous version and the - # current version. - # - # *. For another example, search for the smartmatch operator '~~' - # with your editor to see where updates were made for it. - # - # ----------------------------------------------------------------------- - - my $line_of_tokens = shift; - my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; - - # patch while coding change is underway - # make callers private data to allow access - # $tokenizer_self = $caller_tokenizer_self; - - # extract line number for use in error messages - $input_line_number = $line_of_tokens->{_line_number}; - - # reinitialize for multi-line quote - $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; - - # check for pod documentation - if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { - - # must not be in multi-line quote - # and must not be in an eqn - if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) - { - $tokenizer_self->{_in_pod} = 1; - return; - } - } - - $input_line = $untrimmed_input_line; - - chomp $input_line; - - # trim start of this line unless we are continuing a quoted line - # do not trim end because we might end in a quote (test: deken4.pl) - # Perl::Tidy::Formatter will delete needless trailing blanks - unless ( $in_quote && ( $quote_type eq 'Q' ) ) { - $input_line =~ s/^\s*//; # trim left end - } - - # update the copy of the line for use in error messages - # This must be exactly what we give the pre_tokenizer - $tokenizer_self->{_line_text} = $input_line; - - # re-initialize for the main loop - $routput_token_list = []; # stack of output token indexes - $routput_token_type = []; # token types - $routput_block_type = []; # types of code block - $routput_container_type = []; # paren types, such as if, elsif, .. - $routput_type_sequence = []; # nesting sequential number - - $rhere_target_list = []; - - $tok = $last_nonblank_token; - $type = $last_nonblank_type; - $prototype = $last_nonblank_prototype; - $last_nonblank_i = -1; - $block_type = $last_nonblank_block_type; - $container_type = $last_nonblank_container_type; - $type_sequence = $last_nonblank_type_sequence; - $indent_flag = 0; - $peeked_ahead = 0; - - # tokenization is done in two stages.. - # stage 1 is a very simple pre-tokenization - my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens - - # a little optimization for a full-line comment - if ( !$in_quote && ( $input_line =~ /^#/ ) ) { - $max_tokens_wanted = 1 # no use tokenizing a comment - } - - # start by breaking the line into pre-tokens - ( $rtokens, $rtoken_map, $rtoken_type ) = - pre_tokenize( $input_line, $max_tokens_wanted ); - - $max_token_index = scalar(@$rtokens) - 1; - push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic - push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced - push( @$rtoken_type, 'b', 'b', 'b' ); - - # initialize for main loop - for $i ( 0 .. $max_token_index + 3 ) { - $routput_token_type->[$i] = ""; - $routput_block_type->[$i] = ""; - $routput_container_type->[$i] = ""; - $routput_type_sequence->[$i] = ""; - $routput_indent_flag->[$i] = 0; - } - $i = -1; - $i_tok = -1; - - # ------------------------------------------------------------ - # begin main tokenization loop - # ------------------------------------------------------------ - - # we are looking at each pre-token of one line and combining them - # into tokens - while ( ++$i <= $max_token_index ) { - - if ($in_quote) { # continue looking for end of a quote - $type = $quote_type; - - unless ( @{$routput_token_list} ) - { # initialize if continuation line - push( @{$routput_token_list}, $i ); - $routput_token_type->[$i] = $type; - - } - $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); - - # scan for the end of the quote or pattern - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string_1, $quoted_string_2 - ) - = do_quote( - $i, $in_quote, $quote_character, - $quote_pos, $quote_depth, $quoted_string_1, - $quoted_string_2, $rtokens, $rtoken_map, - $max_token_index - ); - - # all done if we didn't find it - last if ($in_quote); - - # save pattern and replacement text for rescanning - my $qs1 = $quoted_string_1; - my $qs2 = $quoted_string_2; - - # re-initialize for next search - $quote_character = ''; - $quote_pos = 0; - $quote_type = 'Q'; - $quoted_string_1 = ""; - $quoted_string_2 = ""; - last if ( ++$i > $max_token_index ); - - # look for any modifiers - if ($allowed_quote_modifiers) { - - # check for exact quote modifiers - if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) { - my $str = $$rtokens[$i]; - my $saw_modifier_e; - while ( $str =~ /\G$allowed_quote_modifiers/gc ) { - my $pos = pos($str); - my $char = substr( $str, $pos - 1, 1 ); - $saw_modifier_e ||= ( $char eq 'e' ); - } - - # For an 'e' quote modifier we must scan the replacement - # text for here-doc targets. - if ($saw_modifier_e) { - - my $rht = scan_replacement_text($qs1); - - # Change type from 'Q' to 'h' for quotes with - # here-doc targets so that the formatter (see sub - # print_line_of_tokens) will not make any line - # breaks after this point. - if ($rht) { - push @{$rhere_target_list}, @{$rht}; - $type = 'h'; - if ( $i_tok < 0 ) { - my $ilast = $routput_token_list->[-1]; - $routput_token_type->[$ilast] = $type; - } - } - } - - if ( defined( pos($str) ) ) { - - # matched - if ( pos($str) == length($str) ) { - last if ( ++$i > $max_token_index ); - } - - # Looks like a joined quote modifier - # and keyword, maybe something like - # s/xxx/yyy/gefor @k=... - # Example is "galgen.pl". Would have to split - # the word and insert a new token in the - # pre-token list. This is so rare that I haven't - # done it. Will just issue a warning citation. - - # This error might also be triggered if my quote - # modifier characters are incomplete - else { - warning(< $max_token_index ); - } - } - else { - - # example file: rokicki4.pl - # This error might also be triggered if my quote - # modifier characters are incomplete - write_logfile_entry( -"Note: found word $str at quote modifier location\n" - ); - } - } - - # re-initialize - $allowed_quote_modifiers = ""; - } - } - - unless ( $tok =~ /^\s*$/ ) { - - # try to catch some common errors - if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { - - if ( $last_nonblank_token eq 'eq' ) { - complain("Should 'eq' be '==' here ?\n"); - } - elsif ( $last_nonblank_token eq 'ne' ) { - complain("Should 'ne' be '!=' here ?\n"); - } - } - - $last_last_nonblank_token = $last_nonblank_token; - $last_last_nonblank_type = $last_nonblank_type; - $last_last_nonblank_block_type = $last_nonblank_block_type; - $last_last_nonblank_container_type = - $last_nonblank_container_type; - $last_last_nonblank_type_sequence = - $last_nonblank_type_sequence; - $last_nonblank_token = $tok; - $last_nonblank_type = $type; - $last_nonblank_prototype = $prototype; - $last_nonblank_block_type = $block_type; - $last_nonblank_container_type = $container_type; - $last_nonblank_type_sequence = $type_sequence; - $last_nonblank_i = $i_tok; - } - - # store previous token type - if ( $i_tok >= 0 ) { - $routput_token_type->[$i_tok] = $type; - $routput_block_type->[$i_tok] = $block_type; - $routput_container_type->[$i_tok] = $container_type; - $routput_type_sequence->[$i_tok] = $type_sequence; - $routput_indent_flag->[$i_tok] = $indent_flag; - } - my $pre_tok = $$rtokens[$i]; # get the next pre-token - my $pre_type = $$rtoken_type[$i]; # and type - $tok = $pre_tok; - $type = $pre_type; # to be modified as necessary - $block_type = ""; # blank for all tokens except code block braces - $container_type = ""; # blank for all tokens except some parens - $type_sequence = ""; # blank for all tokens except ?/: - $indent_flag = 0; - $prototype = ""; # blank for all tokens except user defined subs - $i_tok = $i; - - # this pre-token will start an output token - push( @{$routput_token_list}, $i_tok ); - - # continue gathering identifier if necessary - # but do not start on blanks and comments - if ( $id_scan_state && $pre_type !~ /[b#]/ ) { - - if ( $id_scan_state =~ /^(sub|package)/ ) { - scan_id(); - } - else { - scan_identifier(); - } - - last if ($id_scan_state); - next if ( ( $i > 0 ) || $type ); - - # didn't find any token; start over - $type = $pre_type; - $tok = $pre_tok; - } - - # handle whitespace tokens.. - next if ( $type eq 'b' ); - my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' '; - my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b'; - - # Build larger tokens where possible, since we are not in a quote. - # - # First try to assemble digraphs. The following tokens are - # excluded and handled specially: - # '/=' is excluded because the / might start a pattern. - # 'x=' is excluded since it might be $x=, with $ on previous line - # '**' and *= might be typeglobs of punctuation variables - # I have allowed tokens starting with <, such as <=, - # because I don't think these could be valid angle operators. - # test file: storrs4.pl - my $test_tok = $tok . $$rtokens[ $i + 1 ]; - my $combine_ok = $is_digraph{$test_tok}; - - # check for special cases which cannot be combined - if ($combine_ok) { - - # '//' must be defined_or operator if an operator is expected. - # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) - # could be migrated here for clarity - if ( $test_tok eq '//' ) { - my $next_type = $$rtokens[ $i + 1 ]; - my $expecting = - operator_expected( $prev_type, $tok, $next_type ); - $combine_ok = 0 unless ( $expecting == OPERATOR ); - } - } - - if ( - $combine_ok - && ( $test_tok ne '/=' ) # might be pattern - && ( $test_tok ne 'x=' ) # might be $x - && ( $test_tok ne '**' ) # typeglob? - && ( $test_tok ne '*=' ) # typeglob? - ) - { - $tok = $test_tok; - $i++; - - # Now try to assemble trigraphs. Note that all possible - # perl trigraphs can be constructed by appending a character - # to a digraph. - $test_tok = $tok . $$rtokens[ $i + 1 ]; - - if ( $is_trigraph{$test_tok} ) { - $tok = $test_tok; - $i++; - } - } - - $type = $tok; - $next_tok = $$rtokens[ $i + 1 ]; - $next_type = $$rtoken_type[ $i + 1 ]; - - TOKENIZER_DEBUG_FLAG_TOKENIZE && do { - local $" = ')('; - my @debug_list = ( - $last_nonblank_token, $tok, - $next_tok, $brace_depth, - $brace_type[$brace_depth], $paren_depth, - $paren_type[$paren_depth] - ); - print "TOKENIZE:(@debug_list)\n"; - }; - - # turn off attribute list on first non-blank, non-bareword - if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } - - ############################################################### - # We have the next token, $tok. - # Now we have to examine this token and decide what it is - # and define its $type - # - # section 1: bare words - ############################################################### - - if ( $pre_type eq 'w' ) { - $expecting = operator_expected( $prev_type, $tok, $next_type ); - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - # ATTRS: handle sub and variable attributes - if ($in_attribute_list) { - - # treat bare word followed by open paren like qw( - if ( $next_nonblank_token eq '(' ) { - $in_quote = $quote_items{'q'}; - $allowed_quote_modifiers = $quote_modifiers{'q'}; - $type = 'q'; - $quote_type = 'q'; - next; - } - - # handle bareword not followed by open paren - else { - $type = 'w'; - next; - } - } - - # quote a word followed by => operator - if ( $next_nonblank_token eq '=' ) { - - if ( $$rtokens[ $i_next + 1 ] eq '>' ) { - if ( $is_constant{$current_package}{$tok} ) { - $type = 'C'; - } - elsif ( $is_user_function{$current_package}{$tok} ) { - $type = 'U'; - $prototype = - $user_function_prototype{$current_package}{$tok}; - } - elsif ( $tok =~ /^v\d+$/ ) { - $type = 'v'; - report_v_string($tok); - } - else { $type = 'w' } - - next; - } - } - - # quote a bare word within braces..like xxx->{s}; note that we - # must be sure this is not a structural brace, to avoid - # mistaking {s} in the following for a quoted bare word: - # for(@[){s}bla}BLA} - # Also treat q in something like var{-q} as a bare word, not qoute operator - ##if ( ( $last_nonblank_type eq 'L' ) - ## && ( $next_nonblank_token eq '}' ) ) - if ( - $next_nonblank_token eq '}' - && ( - $last_nonblank_type eq 'L' - || ( $last_nonblank_type eq 'm' - && $last_last_nonblank_type eq 'L' ) - ) - ) - { - $type = 'w'; - next; - } - - # a bare word immediately followed by :: is not a keyword; - # use $tok_kw when testing for keywords to avoid a mistake - my $tok_kw = $tok; - if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' ) - { - $tok_kw .= '::'; - } - - # handle operator x (now we know it isn't $x=) - if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { - if ( $tok eq 'x' ) { - - if ( $$rtokens[ $i + 1 ] eq '=' ) { # x= - $tok = 'x='; - $type = $tok; - $i++; - } - else { - $type = 'x'; - } - } - - # FIXME: Patch: mark something like x4 as an integer for now - # It gets fixed downstream. This is easier than - # splitting the pretoken. - else { - $type = 'n'; - } - } - - elsif ( ( $tok eq 'strict' ) - and ( $last_nonblank_token eq 'use' ) ) - { - $tokenizer_self->{_saw_use_strict} = 1; - scan_bare_identifier(); - } - - elsif ( ( $tok eq 'warnings' ) - and ( $last_nonblank_token eq 'use' ) ) - { - $tokenizer_self->{_saw_perl_dash_w} = 1; - - # scan as identifier, so that we pick up something like: - # use warnings::register - scan_bare_identifier(); - } - - elsif ( - $tok eq 'AutoLoader' - && $tokenizer_self->{_look_for_autoloader} - && ( - $last_nonblank_token eq 'use' - - # these regexes are from AutoSplit.pm, which we want - # to mimic - || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ - || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ - ) - ) - { - write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); - $tokenizer_self->{_saw_autoloader} = 1; - $tokenizer_self->{_look_for_autoloader} = 0; - scan_bare_identifier(); - } - - elsif ( - $tok eq 'SelfLoader' - && $tokenizer_self->{_look_for_selfloader} - && ( $last_nonblank_token eq 'use' - || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ - || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) - ) - { - write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); - $tokenizer_self->{_saw_selfloader} = 1; - $tokenizer_self->{_look_for_selfloader} = 0; - scan_bare_identifier(); - } - - elsif ( ( $tok eq 'constant' ) - and ( $last_nonblank_token eq 'use' ) ) - { - scan_bare_identifier(); - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, - $max_token_index ); - - if ($next_nonblank_token) { - - if ( $is_keyword{$next_nonblank_token} ) { - warning( -"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" - ); - } - - # FIXME: could check for error in which next token is - # not a word (number, punctuation, ..) - else { - $is_constant{$current_package} - {$next_nonblank_token} = 1; - } - } - } - - # various quote operators - elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { - if ( $expecting == OPERATOR ) { - - # patch for paren-less for/foreach glitch, part 1 - # perl will accept this construct as valid: - # - # foreach my $key qw\Uno Due Tres Quadro\ { - # print "Set $key\n"; - # } - unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} ) - { - error_if_expecting_OPERATOR(); - } - } - $in_quote = $quote_items{$tok}; - $allowed_quote_modifiers = $quote_modifiers{$tok}; - - # All quote types are 'Q' except possibly qw quotes. - # qw quotes are special in that they may generally be trimmed - # of leading and trailing whitespace. So they are given a - # separate type, 'q', unless requested otherwise. - $type = - ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) - ? 'q' - : 'Q'; - $quote_type = $type; - } - - # check for a statement label - elsif ( - ( $next_nonblank_token eq ':' ) - && ( $$rtokens[ $i_next + 1 ] ne ':' ) - && ( $i_next <= $max_token_index ) # colon on same line - && label_ok() - ) - { - if ( $tok !~ /[A-Z]/ ) { - push @{ $tokenizer_self->{_rlower_case_labels_at} }, - $input_line_number; - } - $type = 'J'; - $tok .= ':'; - $i = $i_next; - next; - } - - # 'sub' || 'package' - elsif ( $is_sub_package{$tok_kw} ) { - error_if_expecting_OPERATOR() - if ( $expecting == OPERATOR ); - scan_id(); - } - - # Note on token types for format, __DATA__, __END__: - # It simplifies things to give these type ';', so that when we - # start rescanning we will be expecting a token of type TERM. - # We will switch to type 'k' before outputting the tokens. - elsif ( $is_format_END_DATA{$tok_kw} ) { - $type = ';'; # make tokenizer look for TERM next - $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; - last; - } - - elsif ( $is_keyword{$tok_kw} ) { - $type = 'k'; - - # Since for and foreach may not be followed immediately - # by an opening paren, we have to remember which keyword - # is associated with the next '(' - if ( $is_for_foreach{$tok} ) { - if ( new_statement_ok() ) { - $want_paren = $tok; - } - } - - # recognize 'use' statements, which are special - elsif ( $is_use_require{$tok} ) { - $statement_type = $tok; - error_if_expecting_OPERATOR() - if ( $expecting == OPERATOR ); - } - - # remember my and our to check for trailing ": shared" - elsif ( $is_my_our{$tok} ) { - $statement_type = $tok; - } - - # Check for misplaced 'elsif' and 'else', but allow isolated - # else or elsif blocks to be formatted. This is indicated - # by a last noblank token of ';' - elsif ( $tok eq 'elsif' ) { - if ( $last_nonblank_token ne ';' - && $last_nonblank_block_type !~ - /^(if|elsif|unless)$/ ) - { - warning( -"expecting '$tok' to follow one of 'if|elsif|unless'\n" - ); - } - } - elsif ( $tok eq 'else' ) { - - # patched for SWITCH/CASE - if ( $last_nonblank_token ne ';' - && $last_nonblank_block_type !~ - /^(if|elsif|unless|case|when)$/ ) - { - warning( -"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" - ); - } - } - elsif ( $tok eq 'continue' ) { - if ( $last_nonblank_token ne ';' - && $last_nonblank_block_type !~ - /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) - { - - # note: ';' '{' and '}' in list above - # because continues can follow bare blocks; - # ':' is labeled block - # - ############################################ - # NOTE: This check has been deactivated because - # continue has an alternative usage for given/when - # blocks in perl 5.10 - ## warning("'$tok' should follow a block\n"); - ############################################ - } - } - - # patch for SWITCH/CASE if 'case' and 'when are - # treated as keywords. - elsif ( $tok eq 'when' || $tok eq 'case' ) { - $statement_type = $tok; # next '{' is block - } - - # indent trailing if/unless/while/until - # outdenting will be handled by later indentation loop - if ( $tok =~ /^(if|unless|while|until)$/ - && $next_nonblank_token ne '(' ) - { - $indent_flag = 1; - } - } - - # check for inline label following - # /^(redo|last|next|goto)$/ - elsif (( $last_nonblank_type eq 'k' ) - && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) - { - $type = 'j'; - next; - } - - # something else -- - else { - - scan_bare_identifier(); - if ( $type eq 'w' ) { - - if ( $expecting == OPERATOR ) { - - # don't complain about possible indirect object - # notation. - # For example: - # package main; - # sub new($) { ... } - # $b = new A::; # calls A::new - # $c = new A; # same thing but suspicious - # This will call A::new but we have a 'new' in - # main:: which looks like a constant. - # - if ( $last_nonblank_type eq 'C' ) { - if ( $tok !~ /::$/ ) { - complain(<{$tok}; - if ($code) { - $expecting = - operator_expected( $prev_type, $tok, $next_type ); - $code->(); - redo if $in_quote; - } - } - } - - # ----------------------------- - # end of main tokenization loop - # ----------------------------- - - if ( $i_tok >= 0 ) { - $routput_token_type->[$i_tok] = $type; - $routput_block_type->[$i_tok] = $block_type; - $routput_container_type->[$i_tok] = $container_type; - $routput_type_sequence->[$i_tok] = $type_sequence; - $routput_indent_flag->[$i_tok] = $indent_flag; - } - - unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { - $last_last_nonblank_token = $last_nonblank_token; - $last_last_nonblank_type = $last_nonblank_type; - $last_last_nonblank_block_type = $last_nonblank_block_type; - $last_last_nonblank_container_type = $last_nonblank_container_type; - $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; - $last_nonblank_token = $tok; - $last_nonblank_type = $type; - $last_nonblank_block_type = $block_type; - $last_nonblank_container_type = $container_type; - $last_nonblank_type_sequence = $type_sequence; - $last_nonblank_prototype = $prototype; - } - - # reset indentation level if necessary at a sub or package - # in an attempt to recover from a nesting error - if ( $level_in_tokenizer < 0 ) { - if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { - reset_indentation_level(0); - brace_warning("resetting level to 0 at $1 $2\n"); - } - } - - # all done tokenizing this line ... - # now prepare the final list of tokens and types - - my @token_type = (); # stack of output token types - my @block_type = (); # stack of output code block types - my @container_type = (); # stack of output code container types - my @type_sequence = (); # stack of output type sequence numbers - my @tokens = (); # output tokens - my @levels = (); # structural brace levels of output tokens - my @slevels = (); # secondary nesting levels of output tokens - my @nesting_tokens = (); # string of tokens leading to this depth - my @nesting_types = (); # string of token types leading to this depth - my @nesting_blocks = (); # string of block types leading to this depth - my @nesting_lists = (); # string of list types leading to this depth - my @ci_string = (); # string needed to compute continuation indentation - my @container_environment = (); # BLOCK or LIST - my $container_environment = ''; - my $im = -1; # previous $i value - my $num; - my $ci_string_sum = ones_count($ci_string_in_tokenizer); - -# Computing Token Indentation -# -# The final section of the tokenizer forms tokens and also computes -# parameters needed to find indentation. It is much easier to do it -# in the tokenizer than elsewhere. Here is a brief description of how -# indentation is computed. Perl::Tidy computes indentation as the sum -# of 2 terms: -# -# (1) structural indentation, such as if/else/elsif blocks -# (2) continuation indentation, such as long parameter call lists. -# -# These are occasionally called primary and secondary indentation. -# -# Structural indentation is introduced by tokens of type '{', although -# the actual tokens might be '{', '(', or '['. Structural indentation -# is of two types: BLOCK and non-BLOCK. Default structural indentation -# is 4 characters if the standard indentation scheme is used. -# -# Continuation indentation is introduced whenever a line at BLOCK level -# is broken before its termination. Default continuation indentation -# is 2 characters in the standard indentation scheme. -# -# Both types of indentation may be nested arbitrarily deep and -# interlaced. The distinction between the two is somewhat arbitrary. -# -# For each token, we will define two variables which would apply if -# the current statement were broken just before that token, so that -# that token started a new line: -# -# $level = the structural indentation level, -# $ci_level = the continuation indentation level -# -# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), -# assuming defaults. However, in some special cases it is customary -# to modify $ci_level from this strict value. -# -# The total structural indentation is easy to compute by adding and -# subtracting 1 from a saved value as types '{' and '}' are seen. The -# running value of this variable is $level_in_tokenizer. -# -# The total continuation is much more difficult to compute, and requires -# several variables. These veriables are: -# -# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for -# each indentation level, if there are intervening open secondary -# structures just prior to that level. -# $continuation_string_in_tokenizer = a string of 1's and 0's indicating -# if the last token at that level is "continued", meaning that it -# is not the first token of an expression. -# $nesting_block_string = a string of 1's and 0's indicating, for each -# indentation level, if the level is of type BLOCK or not. -# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string -# $nesting_list_string = a string of 1's and 0's indicating, for each -# indentation level, if it is is appropriate for list formatting. -# If so, continuation indentation is used to indent long list items. -# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string -# @{$rslevel_stack} = a stack of total nesting depths at each -# structural indentation level, where "total nesting depth" means -# the nesting depth that would occur if every nesting token -- '{', '[', -# and '(' -- , regardless of context, is used to compute a nesting -# depth. - - #my $nesting_block_flag = ($nesting_block_string =~ /1$/); - #my $nesting_list_flag = ($nesting_list_string =~ /1$/); - - my ( $ci_string_i, $level_i, $nesting_block_string_i, - $nesting_list_string_i, $nesting_token_string_i, - $nesting_type_string_i, ); - - foreach $i ( @{$routput_token_list} ) - { # scan the list of pre-tokens indexes - - # self-checking for valid token types - my $type = $routput_token_type->[$i]; - my $forced_indentation_flag = $routput_indent_flag->[$i]; - - # See if we should undo the $forced_indentation_flag. - # Forced indentation after 'if', 'unless', 'while' and 'until' - # expressions without trailing parens is optional and doesn't - # always look good. It is usually okay for a trailing logical - # expression, but if the expression is a function call, code block, - # or some kind of list it puts in an unwanted extra indentation - # level which is hard to remove. - # - # Example where extra indentation looks ok: - # return 1 - # if $det_a < 0 and $det_b > 0 - # or $det_a > 0 and $det_b < 0; - # - # Example where extra indentation is not needed because - # the eval brace also provides indentation: - # print "not " if defined eval { - # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; - # }; - # - # The following rule works fairly well: - # Undo the flag if the end of this line, or start of the next - # line, is an opening container token or a comma. - # This almost always works, but if not after another pass it will - # be stable. - if ( $forced_indentation_flag && $type eq 'k' ) { - my $ixlast = -1; - my $ilast = $routput_token_list->[$ixlast]; - my $toklast = $routput_token_type->[$ilast]; - if ( $toklast eq '#' ) { - $ixlast--; - $ilast = $routput_token_list->[$ixlast]; - $toklast = $routput_token_type->[$ilast]; - } - if ( $toklast eq 'b' ) { - $ixlast--; - $ilast = $routput_token_list->[$ixlast]; - $toklast = $routput_token_type->[$ilast]; - } - if ( $toklast =~ /^[\{,]$/ ) { - $forced_indentation_flag = 0; - } - else { - ( $toklast, my $i_next ) = - find_next_nonblank_token( $max_token_index, $rtokens, - $max_token_index ); - if ( $toklast =~ /^[\{,]$/ ) { - $forced_indentation_flag = 0; - } - } - } - - # if we are already in an indented if, see if we should outdent - if ($indented_if_level) { - - # don't try to nest trailing if's - shouldn't happen - if ( $type eq 'k' ) { - $forced_indentation_flag = 0; - } - - # check for the normal case - outdenting at next ';' - elsif ( $type eq ';' ) { - if ( $level_in_tokenizer == $indented_if_level ) { - $forced_indentation_flag = -1; - $indented_if_level = 0; - } - } - - # handle case of missing semicolon - elsif ( $type eq '}' ) { - if ( $level_in_tokenizer == $indented_if_level ) { - $indented_if_level = 0; - - # TBD: This could be a subroutine call - $level_in_tokenizer--; - if ( @{$rslevel_stack} > 1 ) { - pop( @{$rslevel_stack} ); - } - if ( length($nesting_block_string) > 1 ) - { # true for valid script - chop $nesting_block_string; - chop $nesting_list_string; - } - - } - } - } - - my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken - $level_i = $level_in_tokenizer; - - # This can happen by running perltidy on non-scripts - # although it could also be bug introduced by programming change. - # Perl silently accepts a 032 (^Z) and takes it as the end - if ( !$is_valid_token_type{$type} ) { - my $val = ord($type); - warning( - "unexpected character decimal $val ($type) in script\n"); - $tokenizer_self->{_in_error} = 1; - } - - # ---------------------------------------------------------------- - # TOKEN TYPE PATCHES - # output __END__, __DATA__, and format as type 'k' instead of ';' - # to make html colors correct, etc. - my $fix_type = $type; - if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } - - # output anonymous 'sub' as keyword - if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } - - # ----------------------------------------------------------------- - - $nesting_token_string_i = $nesting_token_string; - $nesting_type_string_i = $nesting_type_string; - $nesting_block_string_i = $nesting_block_string; - $nesting_list_string_i = $nesting_list_string; - - # set primary indentation levels based on structural braces - # Note: these are set so that the leading braces have a HIGHER - # level than their CONTENTS, which is convenient for indentation - # Also, define continuation indentation for each token. - if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) - { - - # use environment before updating - $container_environment = - $nesting_block_flag ? 'BLOCK' - : $nesting_list_flag ? 'LIST' - : ""; - - # if the difference between total nesting levels is not 1, - # there are intervening non-structural nesting types between - # this '{' and the previous unclosed '{' - my $intervening_secondary_structure = 0; - if ( @{$rslevel_stack} ) { - $intervening_secondary_structure = - $slevel_in_tokenizer - $rslevel_stack->[-1]; - } - - # Continuation Indentation - # - # Having tried setting continuation indentation both in the formatter and - # in the tokenizer, I can say that setting it in the tokenizer is much, - # much easier. The formatter already has too much to do, and can't - # make decisions on line breaks without knowing what 'ci' will be at - # arbitrary locations. - # - # But a problem with setting the continuation indentation (ci) here - # in the tokenizer is that we do not know where line breaks will actually - # be. As a result, we don't know if we should propagate continuation - # indentation to higher levels of structure. - # - # For nesting of only structural indentation, we never need to do this. - # For example, in a long if statement, like this - # - # if ( !$output_block_type[$i] - # && ($in_statement_continuation) ) - # { <--outdented - # do_something(); - # } - # - # the second line has ci but we do normally give the lines within the BLOCK - # any ci. This would be true if we had blocks nested arbitrarily deeply. - # - # But consider something like this, where we have created a break after - # an opening paren on line 1, and the paren is not (currently) a - # structural indentation token: - # - # my $file = $menubar->Menubutton( - # qw/-text File -underline 0 -menuitems/ => [ - # [ - # Cascade => '~View', - # -menuitems => [ - # ... - # - # The second line has ci, so it would seem reasonable to propagate it - # down, giving the third line 1 ci + 1 indentation. This suggests the - # following rule, which is currently used to propagating ci down: if there - # are any non-structural opening parens (or brackets, or braces), before - # an opening structural brace, then ci is propagated down, and otherwise - # not. The variable $intervening_secondary_structure contains this - # information for the current token, and the string - # "$ci_string_in_tokenizer" is a stack of previous values of this - # variable. - - # save the current states - push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); - $level_in_tokenizer++; - - if ($forced_indentation_flag) { - - # break BEFORE '?' when there is forced indentation - if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } - if ( $type eq 'k' ) { - $indented_if_level = $level_in_tokenizer; - } - } - - if ( $routput_block_type->[$i] ) { - $nesting_block_flag = 1; - $nesting_block_string .= '1'; - } - else { - $nesting_block_flag = 0; - $nesting_block_string .= '0'; - } - - # we will use continuation indentation within containers - # which are not blocks and not logical expressions - my $bit = 0; - if ( !$routput_block_type->[$i] ) { - - # propagate flag down at nested open parens - if ( $routput_container_type->[$i] eq '(' ) { - $bit = 1 if $nesting_list_flag; - } - - # use list continuation if not a logical grouping - # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ - else { - $bit = 1 - unless - $is_logical_container{ $routput_container_type->[$i] - }; - } - } - $nesting_list_string .= $bit; - $nesting_list_flag = $bit; - - $ci_string_in_tokenizer .= - ( $intervening_secondary_structure != 0 ) ? '1' : '0'; - $ci_string_sum = ones_count($ci_string_in_tokenizer); - $continuation_string_in_tokenizer .= - ( $in_statement_continuation > 0 ) ? '1' : '0'; - - # Sometimes we want to give an opening brace continuation indentation, - # and sometimes not. For code blocks, we don't do it, so that the leading - # '{' gets outdented, like this: - # - # if ( !$output_block_type[$i] - # && ($in_statement_continuation) ) - # { <--outdented - # - # For other types, we will give them continuation indentation. For example, - # here is how a list looks with the opening paren indented: - # - # @LoL = - # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], - # [ "homer", "marge", "bart" ], ); - # - # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) - - my $total_ci = $ci_string_sum; - if ( - !$routput_block_type->[$i] # patch: skip for BLOCK - && ($in_statement_continuation) - && !( $forced_indentation_flag && $type eq ':' ) - ) - { - $total_ci += $in_statement_continuation - unless ( $ci_string_in_tokenizer =~ /1$/ ); - } - - $ci_string_i = $total_ci; - $in_statement_continuation = 0; - } - - elsif ($type eq '}' - || $type eq 'R' - || $forced_indentation_flag < 0 ) - { - - # only a nesting error in the script would prevent popping here - if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } - - $level_i = --$level_in_tokenizer; - - # restore previous level values - if ( length($nesting_block_string) > 1 ) - { # true for valid script - chop $nesting_block_string; - $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); - chop $nesting_list_string; - $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); - - chop $ci_string_in_tokenizer; - $ci_string_sum = ones_count($ci_string_in_tokenizer); - - $in_statement_continuation = - chop $continuation_string_in_tokenizer; - - # zero continuation flag at terminal BLOCK '}' which - # ends a statement. - if ( $routput_block_type->[$i] ) { - - # ...These include non-anonymous subs - # note: could be sub ::abc { or sub 'abc - if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { - - # note: older versions of perl require the /gc modifier - # here or else the \G does not work. - if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) - { - $in_statement_continuation = 0; - } - } - -# ...and include all block types except user subs with -# block prototypes and these: (sort|grep|map|do|eval) -# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ - elsif ( - $is_zero_continuation_block_type{ - $routput_block_type->[$i] } ) - { - $in_statement_continuation = 0; - } - - # ..but these are not terminal types: - # /^(sort|grep|map|do|eval)$/ ) - elsif ( - $is_not_zero_continuation_block_type{ - $routput_block_type->[$i] } ) - { - } - - # ..and a block introduced by a label - # /^\w+\s*:$/gc ) { - elsif ( $routput_block_type->[$i] =~ /:$/ ) { - $in_statement_continuation = 0; - } - - # user function with block prototype - else { - $in_statement_continuation = 0; - } - } - - # If we are in a list, then - # we must set continuatoin indentation at the closing - # paren of something like this (paren after $check): - # assert( - # __LINE__, - # ( not defined $check ) - # or ref $check - # or $check eq "new" - # or $check eq "old", - # ); - elsif ( $tok eq ')' ) { - $in_statement_continuation = 1 - if $routput_container_type->[$i] =~ /^[;,\{\}]$/; - } - - elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } - } - - # use environment after updating - $container_environment = - $nesting_block_flag ? 'BLOCK' - : $nesting_list_flag ? 'LIST' - : ""; - $ci_string_i = $ci_string_sum + $in_statement_continuation; - $nesting_block_string_i = $nesting_block_string; - $nesting_list_string_i = $nesting_list_string; - } - - # not a structural indentation type.. - else { - - $container_environment = - $nesting_block_flag ? 'BLOCK' - : $nesting_list_flag ? 'LIST' - : ""; - - # zero the continuation indentation at certain tokens so - # that they will be at the same level as its container. For - # commas, this simplifies the -lp indentation logic, which - # counts commas. For ?: it makes them stand out. - if ($nesting_list_flag) { - if ( $type =~ /^[,\?\:]$/ ) { - $in_statement_continuation = 0; - } - } - - # be sure binary operators get continuation indentation - if ( - $container_environment - && ( $type eq 'k' && $is_binary_keyword{$tok} - || $is_binary_type{$type} ) - ) - { - $in_statement_continuation = 1; - } - - # continuation indentation is sum of any open ci from previous - # levels plus the current level - $ci_string_i = $ci_string_sum + $in_statement_continuation; - - # update continuation flag ... - # if this isn't a blank or comment.. - if ( $type ne 'b' && $type ne '#' ) { - - # and we are in a BLOCK - if ($nesting_block_flag) { - - # the next token after a ';' and label starts a new stmt - if ( $type eq ';' || $type eq 'J' ) { - $in_statement_continuation = 0; - } - - # otherwise, we are continuing the current statement - else { - $in_statement_continuation = 1; - } - } - - # if we are not in a BLOCK.. - else { - - # do not use continuation indentation if not list - # environment (could be within if/elsif clause) - if ( !$nesting_list_flag ) { - $in_statement_continuation = 0; - } - - # otherwise, the next token after a ',' starts a new term - elsif ( $type eq ',' ) { - $in_statement_continuation = 0; - } - - # otherwise, we are continuing the current term - else { - $in_statement_continuation = 1; - } - } - } - } - - if ( $level_in_tokenizer < 0 ) { - unless ( $tokenizer_self->{_saw_negative_indentation} ) { - $tokenizer_self->{_saw_negative_indentation} = 1; - warning("Starting negative indentation\n"); - } - } - - # set secondary nesting levels based on all continment token types - # Note: these are set so that the nesting depth is the depth - # of the PREVIOUS TOKEN, which is convenient for setting - # the stength of token bonds - my $slevel_i = $slevel_in_tokenizer; - - # /^[L\{\(\[]$/ - if ( $is_opening_type{$type} ) { - $slevel_in_tokenizer++; - $nesting_token_string .= $tok; - $nesting_type_string .= $type; - } - - # /^[R\}\)\]]$/ - elsif ( $is_closing_type{$type} ) { - $slevel_in_tokenizer--; - my $char = chop $nesting_token_string; - - if ( $char ne $matching_start_token{$tok} ) { - $nesting_token_string .= $char . $tok; - $nesting_type_string .= $type; - } - else { - chop $nesting_type_string; - } - } - - push( @block_type, $routput_block_type->[$i] ); - push( @ci_string, $ci_string_i ); - push( @container_environment, $container_environment ); - push( @container_type, $routput_container_type->[$i] ); - push( @levels, $level_i ); - push( @nesting_tokens, $nesting_token_string_i ); - push( @nesting_types, $nesting_type_string_i ); - push( @slevels, $slevel_i ); - push( @token_type, $fix_type ); - push( @type_sequence, $routput_type_sequence->[$i] ); - push( @nesting_blocks, $nesting_block_string ); - push( @nesting_lists, $nesting_list_string ); - - # now form the previous token - if ( $im >= 0 ) { - $num = - $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters - - if ( $num > 0 ) { - push( @tokens, - substr( $input_line, $$rtoken_map[$im], $num ) ); - } - } - $im = $i; - } - - $num = length($input_line) - $$rtoken_map[$im]; # make the last token - if ( $num > 0 ) { - push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) ); - } - - $tokenizer_self->{_in_attribute_list} = $in_attribute_list; - $tokenizer_self->{_in_quote} = $in_quote; - $tokenizer_self->{_quote_target} = - $in_quote ? matching_end_token($quote_character) : ""; - $tokenizer_self->{_rhere_target_list} = $rhere_target_list; - - $line_of_tokens->{_rtoken_type} = \@token_type; - $line_of_tokens->{_rtokens} = \@tokens; - $line_of_tokens->{_rblock_type} = \@block_type; - $line_of_tokens->{_rcontainer_type} = \@container_type; - $line_of_tokens->{_rcontainer_environment} = \@container_environment; - $line_of_tokens->{_rtype_sequence} = \@type_sequence; - $line_of_tokens->{_rlevels} = \@levels; - $line_of_tokens->{_rslevels} = \@slevels; - $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; - $line_of_tokens->{_rci_levels} = \@ci_string; - $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; - - return; - } -} # end tokenize_this_line - -#########i############################################################# -# Tokenizer routines which assist in identifying token types -####################################################################### - -sub operator_expected { - - # Many perl symbols have two or more meanings. For example, '<<' - # can be a shift operator or a here-doc operator. The - # interpretation of these symbols depends on the current state of - # the tokenizer, which may either be expecting a term or an - # operator. For this example, a << would be a shift if an operator - # is expected, and a here-doc if a term is expected. This routine - # is called to make this decision for any current token. It returns - # one of three possible values: - # - # OPERATOR - operator expected (or at least, not a term) - # UNKNOWN - can't tell - # TERM - a term is expected (or at least, not an operator) - # - # The decision is based on what has been seen so far. This - # information is stored in the "$last_nonblank_type" and - # "$last_nonblank_token" variables. For example, if the - # $last_nonblank_type is '=~', then we are expecting a TERM, whereas - # if $last_nonblank_type is 'n' (numeric), we are expecting an - # OPERATOR. - # - # If a UNKNOWN is returned, the calling routine must guess. A major - # goal of this tokenizer is to minimize the possiblity of returning - # UNKNOWN, because a wrong guess can spoil the formatting of a - # script. - # - # adding NEW_TOKENS: it is critically important that this routine be - # updated to allow it to determine if an operator or term is to be - # expected after the new token. Doing this simply involves adding - # the new token character to one of the regexes in this routine or - # to one of the hash lists - # that it uses, which are initialized in the BEGIN section. - # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, - # $statement_type - - my ( $prev_type, $tok, $next_type ) = @_; - - my $op_expected = UNKNOWN; - -#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; - -# Note: function prototype is available for token type 'U' for future -# program development. It contains the leading and trailing parens, -# and no blanks. It might be used to eliminate token type 'C', for -# example (prototype = '()'). Thus: -# if ($last_nonblank_type eq 'U') { -# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; -# } - - # A possible filehandle (or object) requires some care... - if ( $last_nonblank_type eq 'Z' ) { - - # angle.t - if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { - $op_expected = UNKNOWN; - } - - # For possible file handle like "$a", Perl uses weird parsing rules. - # For example: - # print $a/2,"/hi"; - division - # print $a / 2,"/hi"; - division - # print $a/ 2,"/hi"; - division - # print $a /2,"/hi"; - pattern (and error)! - elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { - $op_expected = TERM; - } - - # Note when an operation is being done where a - # filehandle might be expected, since a change in whitespace - # could change the interpretation of the statement. - else { - if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { - complain("operator in print statement not recommended\n"); - $op_expected = OPERATOR; - } - } - } - - # handle something after 'do' and 'eval' - elsif ( $is_block_operator{$last_nonblank_token} ) { - - # something like $a = eval "expression"; - # ^ - if ( $last_nonblank_type eq 'k' ) { - $op_expected = TERM; # expression or list mode following keyword - } - - # something like $a = do { BLOCK } / 2; - # ^ - else { - $op_expected = OPERATOR; # block mode following } - } - } - - # handle bare word.. - elsif ( $last_nonblank_type eq 'w' ) { - - # unfortunately, we can't tell what type of token to expect next - # after most bare words - $op_expected = UNKNOWN; - } - - # operator, but not term possible after these types - # Note: moved ')' from type to token because parens in list context - # get marked as '{' '}' now. This is a minor glitch in the following: - # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); - # - elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) - || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) - { - $op_expected = OPERATOR; - - # in a 'use' statement, numbers and v-strings are not true - # numbers, so to avoid incorrect error messages, we will - # mark them as unknown for now (use.t) - # TODO: it would be much nicer to create a new token V for VERSION - # number in a use statement. Then this could be a check on type V - # and related patches which change $statement_type for '=>' - # and ',' could be removed. Further, it would clean things up to - # scan the 'use' statement with a separate subroutine. - if ( ( $statement_type eq 'use' ) - && ( $last_nonblank_type =~ /^[nv]$/ ) ) - { - $op_expected = UNKNOWN; - } - } - - # no operator after many keywords, such as "die", "warn", etc - elsif ( $expecting_term_token{$last_nonblank_token} ) { - - # patch for dor.t (defined or). - # perl functions which may be unary operators - # TODO: This list is incomplete, and these should be put - # into a hash. - if ( $tok eq '/' - && $next_type eq '/' - && $last_nonblank_type eq 'k' - && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) - { - $op_expected = OPERATOR; - } - else { - $op_expected = TERM; - } - } - - # no operator after things like + - ** (i.e., other operators) - elsif ( $expecting_term_types{$last_nonblank_type} ) { - $op_expected = TERM; - } - - # a few operators, like "time", have an empty prototype () and so - # take no parameters but produce a value to operate on - elsif ( $expecting_operator_token{$last_nonblank_token} ) { - $op_expected = OPERATOR; - } - - # post-increment and decrement produce values to be operated on - elsif ( $expecting_operator_types{$last_nonblank_type} ) { - $op_expected = OPERATOR; - } - - # no value to operate on after sub block - elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } - - # a right brace here indicates the end of a simple block. - # all non-structural right braces have type 'R' - # all braces associated with block operator keywords have been given those - # keywords as "last_nonblank_token" and caught above. - # (This statement is order dependent, and must come after checking - # $last_nonblank_token). - elsif ( $last_nonblank_type eq '}' ) { - - # patch for dor.t (defined or). - if ( $tok eq '/' - && $next_type eq '/' - && $last_nonblank_token eq ']' ) - { - $op_expected = OPERATOR; - } - else { - $op_expected = TERM; - } - } - - # something else..what did I forget? - else { - - # collecting diagnostics on unknown operator types..see what was missed - $op_expected = UNKNOWN; - write_diagnostics( -"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" - ); - } - - TOKENIZER_DEBUG_FLAG_EXPECT && do { - print -"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; - }; - return $op_expected; -} - -sub new_statement_ok { - - # return true if the current token can start a new statement - # USES GLOBAL VARIABLES: $last_nonblank_type - - return label_ok() # a label would be ok here - - || $last_nonblank_type eq 'J'; # or we follow a label - -} - -sub label_ok { - - # Decide if a bare word followed by a colon here is a label - # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, - # $brace_depth, @brace_type - - # if it follows an opening or closing code block curly brace.. - if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) - && $last_nonblank_type eq $last_nonblank_token ) - { - - # it is a label if and only if the curly encloses a code block - return $brace_type[$brace_depth]; - } - - # otherwise, it is a label if and only if it follows a ';' - # (real or fake) - else { - return ( $last_nonblank_type eq ';' ); - } -} - -sub code_block_type { - - # Decide if this is a block of code, and its type. - # Must be called only when $type = $token = '{' - # The problem is to distinguish between the start of a block of code - # and the start of an anonymous hash reference - # Returns "" if not code block, otherwise returns 'last_nonblank_token' - # to indicate the type of code block. (For example, 'last_nonblank_token' - # might be 'if' for an if block, 'else' for an else block, etc). - # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, - # $last_nonblank_block_type, $brace_depth, @brace_type - - # handle case of multiple '{'s - -# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; - - my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; - if ( $last_nonblank_token eq '{' - && $last_nonblank_type eq $last_nonblank_token ) - { - - # opening brace where a statement may appear is probably - # a code block but might be and anonymous hash reference - if ( $brace_type[$brace_depth] ) { - return decide_if_code_block( $i, $rtokens, $rtoken_type, - $max_token_index ); - } - - # cannot start a code block within an anonymous hash - else { - return ""; - } - } - - elsif ( $last_nonblank_token eq ';' ) { - - # an opening brace where a statement may appear is probably - # a code block but might be and anonymous hash reference - return decide_if_code_block( $i, $rtokens, $rtoken_type, - $max_token_index ); - } - - # handle case of '}{' - elsif ($last_nonblank_token eq '}' - && $last_nonblank_type eq $last_nonblank_token ) - { - - # a } { situation ... - # could be hash reference after code block..(blktype1.t) - if ($last_nonblank_block_type) { - return decide_if_code_block( $i, $rtokens, $rtoken_type, - $max_token_index ); - } - - # must be a block if it follows a closing hash reference - else { - return $last_nonblank_token; - } - } - - # NOTE: braces after type characters start code blocks, but for - # simplicity these are not identified as such. See also - # sub is_non_structural_brace. - # elsif ( $last_nonblank_type eq 't' ) { - # return $last_nonblank_token; - # } - - # brace after label: - elsif ( $last_nonblank_type eq 'J' ) { - return $last_nonblank_token; - } - -# otherwise, look at previous token. This must be a code block if -# it follows any of these: -# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ - elsif ( $is_code_block_token{$last_nonblank_token} ) { - return $last_nonblank_token; - } - - # or a sub definition - elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) - && $last_nonblank_token =~ /^sub\b/ ) - { - return $last_nonblank_token; - } - - # user-defined subs with block parameters (like grep/map/eval) - elsif ( $last_nonblank_type eq 'G' ) { - return $last_nonblank_token; - } - - # check bareword - elsif ( $last_nonblank_type eq 'w' ) { - return decide_if_code_block( $i, $rtokens, $rtoken_type, - $max_token_index ); - } - - # anything else must be anonymous hash reference - else { - return ""; - } -} - -sub decide_if_code_block { - - # USES GLOBAL VARIABLES: $last_nonblank_token - my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - # we are at a '{' where a statement may appear. - # We must decide if this brace starts an anonymous hash or a code - # block. - # return "" if anonymous hash, and $last_nonblank_token otherwise - - # initialize to be code BLOCK - my $code_block_type = $last_nonblank_token; - - # Check for the common case of an empty anonymous hash reference: - # Maybe something like sub { { } } - if ( $next_nonblank_token eq '}' ) { - $code_block_type = ""; - } - - else { - - # To guess if this '{' is an anonymous hash reference, look ahead - # and test as follows: - # - # it is a hash reference if next come: - # - a string or digit followed by a comma or => - # - bareword followed by => - # otherwise it is a code block - # - # Examples of anonymous hash ref: - # {'aa',}; - # {1,2} - # - # Examples of code blocks: - # {1; print "hello\n", 1;} - # {$a,1}; - - # We are only going to look ahead one more (nonblank/comment) line. - # Strange formatting could cause a bad guess, but that's unlikely. - my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; - my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; - my ( $rpre_tokens, $rpre_types ) = - peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but - # generous, and prevents - # wasting lots of - # time in mangled files - if ( defined($rpre_types) && @$rpre_types ) { - push @pre_types, @$rpre_types; - push @pre_tokens, @$rpre_tokens; - } - - # put a sentinal token to simplify stopping the search - push @pre_types, '}'; - - my $jbeg = 0; - $jbeg = 1 if $pre_types[0] eq 'b'; - - # first look for one of these - # - bareword - # - bareword with leading - - # - digit - # - quoted string - my $j = $jbeg; - if ( $pre_types[$j] =~ /^[\'\"]/ ) { - - # find the closing quote; don't worry about escapes - my $quote_mark = $pre_types[$j]; - for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) { - if ( $pre_types[$k] eq $quote_mark ) { - $j = $k + 1; - my $next = $pre_types[$j]; - last; - } - } - } - elsif ( $pre_types[$j] eq 'd' ) { - $j++; - } - elsif ( $pre_types[$j] eq 'w' ) { - unless ( $is_keyword{ $pre_tokens[$j] } ) { - $j++; - } - } - elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { - $j++; - } - if ( $j > $jbeg ) { - - $j++ if $pre_types[$j] eq 'b'; - - # it's a hash ref if a comma or => follow next - if ( $pre_types[$j] eq ',' - || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) ) - { - $code_block_type = ""; - } - } - } - - return $code_block_type; -} - -sub unexpected { - - # report unexpected token type and show where it is - # USES GLOBAL VARIABLES: $tokenizer_self - my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, - $rpretoken_type, $input_line ) - = @_; - - if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { - my $msg = "found $found where $expecting expected"; - my $pos = $$rpretoken_map[$i_tok]; - interrupt_logfile(); - my $input_line_number = $tokenizer_self->{_last_line_number}; - my ( $offset, $numbered_line, $underline ) = - make_numbered_line( $input_line_number, $input_line, $pos ); - $underline = write_on_underline( $underline, $pos - $offset, '^' ); - - my $trailer = ""; - if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { - my $pos_prev = $$rpretoken_map[$last_nonblank_i]; - my $num; - if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) { - $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev; - } - else { - $num = $pos - $pos_prev; - } - if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } - - $underline = - write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); - $trailer = " (previous token underlined)"; - } - warning( $numbered_line . "\n" ); - warning( $underline . "\n" ); - warning( $msg . $trailer . "\n" ); - resume_logfile(); - } -} - -sub is_non_structural_brace { - - # Decide if a brace or bracket is structural or non-structural - # by looking at the previous token and type - # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token - - # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. - # Tentatively deactivated because it caused the wrong operator expectation - # for this code: - # $user = @vars[1] / 100; - # Must update sub operator_expected before re-implementing. - # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { - # return 0; - # } - - # NOTE: braces after type characters start code blocks, but for - # simplicity these are not identified as such. See also - # sub code_block_type - # if ($last_nonblank_type eq 't') {return 0} - - # otherwise, it is non-structural if it is decorated - # by type information. - # For example, the '{' here is non-structural: ${xxx} - ( - $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ - - # or if we follow a hash or array closing curly brace or bracket - # For example, the second '{' in this is non-structural: $a{'x'}{'y'} - # because the first '}' would have been given type 'R' - || $last_nonblank_type =~ /^([R\]])$/ - ); -} - -#########i############################################################# -# Tokenizer routines for tracking container nesting depths -####################################################################### - -# The following routines keep track of nesting depths of the nesting -# types, ( [ { and ?. This is necessary for determining the indentation -# level, and also for debugging programs. Not only do they keep track of -# nesting depths of the individual brace types, but they check that each -# of the other brace types is balanced within matching pairs. For -# example, if the program sees this sequence: -# -# { ( ( ) } -# -# then it can determine that there is an extra left paren somewhere -# between the { and the }. And so on with every other possible -# combination of outer and inner brace types. For another -# example: -# -# ( [ ..... ] ] ) -# -# which has an extra ] within the parens. -# -# The brace types have indexes 0 .. 3 which are indexes into -# the matrices. -# -# The pair ? : are treated as just another nesting type, with ? acting -# as the opening brace and : acting as the closing brace. -# -# The matrix -# -# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; -# -# saves the nesting depth of brace type $b (where $b is either of the other -# nesting types) when brace type $a enters a new depth. When this depth -# decreases, a check is made that the current depth of brace types $b is -# unchanged, or otherwise there must have been an error. This can -# be very useful for localizing errors, particularly when perl runs to -# the end of a large file (such as this one) and announces that there -# is a problem somewhere. -# -# A numerical sequence number is maintained for every nesting type, -# so that each matching pair can be uniquely identified in a simple -# way. - -sub increase_nesting_depth { - my ( $aa, $pos ) = @_; - - # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, - # @current_sequence_number, @depth_array, @starting_line_of_current_depth - my $bb; - $current_depth[$aa]++; - $total_depth++; - $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; - my $input_line_number = $tokenizer_self->{_last_line_number}; - my $input_line = $tokenizer_self->{_line_text}; - - # Sequence numbers increment by number of items. This keeps - # a unique set of numbers but still allows the relative location - # of any type to be determined. - $nesting_sequence_number[$aa] += scalar(@closing_brace_names); - my $seqno = $nesting_sequence_number[$aa]; - $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; - - $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = - [ $input_line_number, $input_line, $pos ]; - - for $bb ( 0 .. $#closing_brace_names ) { - next if ( $bb == $aa ); - $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; - } - - # set a flag for indenting a nested ternary statement - my $indent = 0; - if ( $aa == QUESTION_COLON ) { - $nested_ternary_flag[ $current_depth[$aa] ] = 0; - if ( $current_depth[$aa] > 1 ) { - if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { - my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; - if ( $pdepth == $total_depth - 1 ) { - $indent = 1; - $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; - } - } - } - } - return ( $seqno, $indent ); -} - -sub decrease_nesting_depth { - - my ( $aa, $pos ) = @_; - - # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, - # @current_sequence_number, @depth_array, @starting_line_of_current_depth - my $bb; - my $seqno = 0; - my $input_line_number = $tokenizer_self->{_last_line_number}; - my $input_line = $tokenizer_self->{_line_text}; - - my $outdent = 0; - $total_depth--; - if ( $current_depth[$aa] > 0 ) { - - # set a flag for un-indenting after seeing a nested ternary statement - $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; - if ( $aa == QUESTION_COLON ) { - $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; - } - - # check that any brace types $bb contained within are balanced - for $bb ( 0 .. $#closing_brace_names ) { - next if ( $bb == $aa ); - - unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == - $current_depth[$bb] ) - { - my $diff = - $current_depth[$bb] - - $depth_array[$aa][$bb][ $current_depth[$aa] ]; - - # don't whine too many times - my $saw_brace_error = get_saw_brace_error(); - if ( - $saw_brace_error <= MAX_NAG_MESSAGES - - # if too many closing types have occured, we probably - # already caught this error - && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) - ) - { - interrupt_logfile(); - my $rsl = - $starting_line_of_current_depth[$aa] - [ $current_depth[$aa] ]; - my $sl = $$rsl[0]; - my $rel = [ $input_line_number, $input_line, $pos ]; - my $el = $$rel[0]; - my ($ess); - - if ( $diff == 1 || $diff == -1 ) { - $ess = ''; - } - else { - $ess = 's'; - } - my $bname = - ( $diff > 0 ) - ? $opening_brace_names[$bb] - : $closing_brace_names[$bb]; - write_error_indicator_pair( @$rsl, '^' ); - my $msg = <<"EOM"; -Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el -EOM - - if ( $diff > 0 ) { - my $rml = - $starting_line_of_current_depth[$bb] - [ $current_depth[$bb] ]; - my $ml = $$rml[0]; - $msg .= -" The most recent un-matched $bname is on line $ml\n"; - write_error_indicator_pair( @$rml, '^' ); - } - write_error_indicator_pair( @$rel, '^' ); - warning($msg); - resume_logfile(); - } - increment_brace_error(); - } - } - $current_depth[$aa]--; - } - else { - - my $saw_brace_error = get_saw_brace_error(); - if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { - my $msg = <<"EOM"; -There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number -EOM - indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); - } - increment_brace_error(); - } - return ( $seqno, $outdent ); -} - -sub check_final_nesting_depths { - my ($aa); - - # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth - - for $aa ( 0 .. $#closing_brace_names ) { - - if ( $current_depth[$aa] ) { - my $rsl = - $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; - my $sl = $$rsl[0]; - my $msg = <<"EOM"; -Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] -The most recent un-matched $opening_brace_names[$aa] is on line $sl -EOM - indicate_error( $msg, @$rsl, '^' ); - increment_brace_error(); - } - } -} - -#########i############################################################# -# Tokenizer routines for looking ahead in input stream -####################################################################### - -sub peek_ahead_for_n_nonblank_pre_tokens { - - # returns next n pretokens if they exist - # returns undef's if hits eof without seeing any pretokens - # USES GLOBAL VARIABLES: $tokenizer_self - my $max_pretokens = shift; - my $line; - my $i = 0; - my ( $rpre_tokens, $rmap, $rpre_types ); - - while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) - { - $line =~ s/^\s*//; # trim leading blanks - next if ( length($line) <= 0 ); # skip blank - next if ( $line =~ /^#/ ); # skip comment - ( $rpre_tokens, $rmap, $rpre_types ) = - pre_tokenize( $line, $max_pretokens ); - last; - } - return ( $rpre_tokens, $rpre_types ); -} - -# look ahead for next non-blank, non-comment line of code -sub peek_ahead_for_nonblank_token { - - # USES GLOBAL VARIABLES: $tokenizer_self - my ( $rtokens, $max_token_index ) = @_; - my $line; - my $i = 0; - - while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) - { - $line =~ s/^\s*//; # trim leading blanks - next if ( length($line) <= 0 ); # skip blank - next if ( $line =~ /^#/ ); # skip comment - my ( $rtok, $rmap, $rtype ) = - pre_tokenize( $line, 2 ); # only need 2 pre-tokens - my $j = $max_token_index + 1; - my $tok; - - foreach $tok (@$rtok) { - last if ( $tok =~ "\n" ); - $$rtokens[ ++$j ] = $tok; - } - last; - } - return $rtokens; -} - -#########i############################################################# -# Tokenizer guessing routines for ambiguous situations -####################################################################### - -sub guess_if_pattern_or_conditional { - - # this routine is called when we have encountered a ? following an - # unknown bareword, and we must decide if it starts a pattern or not - # input parameters: - # $i - token index of the ? starting possible pattern - # output parameters: - # $is_pattern = 0 if probably not pattern, =1 if probably a pattern - # msg = a warning or diagnostic message - # USES GLOBAL VARIABLES: $last_nonblank_token - my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; - my $is_pattern = 0; - my $msg = "guessing that ? after $last_nonblank_token starts a "; - - if ( $i >= $max_token_index ) { - $msg .= "conditional (no end to pattern found on the line)\n"; - } - else { - my $ibeg = $i; - $i = $ibeg + 1; - my $next_token = $$rtokens[$i]; # first token after ? - - # look for a possible ending ? on this line.. - my $in_quote = 1; - my $quote_depth = 0; - my $quote_character = ''; - my $quote_pos = 0; - my $quoted_string; - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, - $quote_pos, $quote_depth, $max_token_index ); - - if ($in_quote) { - - # we didn't find an ending ? on this line, - # so we bias towards conditional - $is_pattern = 0; - $msg .= "conditional (no ending ? on this line)\n"; - - # we found an ending ?, so we bias towards a pattern - } - else { - - if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { - $is_pattern = 1; - $msg .= "pattern (found ending ? and pattern expected)\n"; - } - else { - $msg .= "pattern (uncertain, but found ending ?)\n"; - } - } - } - return ( $is_pattern, $msg ); -} - -sub guess_if_pattern_or_division { - - # this routine is called when we have encountered a / following an - # unknown bareword, and we must decide if it starts a pattern or is a - # division - # input parameters: - # $i - token index of the / starting possible pattern - # output parameters: - # $is_pattern = 0 if probably division, =1 if probably a pattern - # msg = a warning or diagnostic message - # USES GLOBAL VARIABLES: $last_nonblank_token - my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; - my $is_pattern = 0; - my $msg = "guessing that / after $last_nonblank_token starts a "; - - if ( $i >= $max_token_index ) { - "division (no end to pattern found on the line)\n"; - } - else { - my $ibeg = $i; - my $divide_expected = - numerator_expected( $i, $rtokens, $max_token_index ); - $i = $ibeg + 1; - my $next_token = $$rtokens[$i]; # first token after slash - - # look for a possible ending / on this line.. - my $in_quote = 1; - my $quote_depth = 0; - my $quote_character = ''; - my $quote_pos = 0; - my $quoted_string; - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, - $quote_pos, $quote_depth, $max_token_index ); - - if ($in_quote) { - - # we didn't find an ending / on this line, - # so we bias towards division - if ( $divide_expected >= 0 ) { - $is_pattern = 0; - $msg .= "division (no ending / on this line)\n"; - } - else { - $msg = "multi-line pattern (division not possible)\n"; - $is_pattern = 1; - } - - } - - # we found an ending /, so we bias towards a pattern - else { - - if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { - - if ( $divide_expected >= 0 ) { - - if ( $i - $ibeg > 60 ) { - $msg .= "division (matching / too distant)\n"; - $is_pattern = 0; - } - else { - $msg .= "pattern (but division possible too)\n"; - $is_pattern = 1; - } - } - else { - $is_pattern = 1; - $msg .= "pattern (division not possible)\n"; - } - } - else { - - if ( $divide_expected >= 0 ) { - $is_pattern = 0; - $msg .= "division (pattern not possible)\n"; - } - else { - $is_pattern = 1; - $msg .= - "pattern (uncertain, but division would not work here)\n"; - } - } - } - } - return ( $is_pattern, $msg ); -} - -# try to resolve here-doc vs. shift by looking ahead for -# non-code or the end token (currently only looks for end token) -# returns 1 if it is probably a here doc, 0 if not -sub guess_if_here_doc { - - # This is how many lines we will search for a target as part of the - # guessing strategy. It is a constant because there is probably - # little reason to change it. - # USES GLOBAL VARIABLES: $tokenizer_self, $current_package - # %is_constant, - use constant HERE_DOC_WINDOW => 40; - - my $next_token = shift; - my $here_doc_expected = 0; - my $line; - my $k = 0; - my $msg = "checking <<"; - - while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) - { - chomp $line; - - if ( $line =~ /^$next_token$/ ) { - $msg .= " -- found target $next_token ahead $k lines\n"; - $here_doc_expected = 1; # got it - last; - } - last if ( $k >= HERE_DOC_WINDOW ); - } - - unless ($here_doc_expected) { - - if ( !defined($line) ) { - $here_doc_expected = -1; # hit eof without seeing target - $msg .= " -- must be shift; target $next_token not in file\n"; - - } - else { # still unsure..taking a wild guess - - if ( !$is_constant{$current_package}{$next_token} ) { - $here_doc_expected = 1; - $msg .= - " -- guessing it's a here-doc ($next_token not a constant)\n"; - } - else { - $msg .= - " -- guessing it's a shift ($next_token is a constant)\n"; - } - } - } - write_logfile_entry($msg); - return $here_doc_expected; -} - -#########i############################################################# -# Tokenizer Routines for scanning identifiers and related items -####################################################################### - -sub scan_bare_identifier_do { - - # this routine is called to scan a token starting with an alphanumeric - # variable or package separator, :: or '. - # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, - # $last_nonblank_type,@paren_type, $paren_depth - - my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, - $max_token_index ) - = @_; - my $i_begin = $i; - my $package = undef; - - my $i_beg = $i; - - # we have to back up one pretoken at a :: since each : is one pretoken - if ( $tok eq '::' ) { $i_beg-- } - if ( $tok eq '->' ) { $i_beg-- } - my $pos_beg = $$rtoken_map[$i_beg]; - pos($input_line) = $pos_beg; - - # Examples: - # A::B::C - # A:: - # ::A - # A'B - if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { - - my $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $tok = substr( $input_line, $pos_beg, $numc ); - - # type 'w' includes anything without leading type info - # ($,%,@,*) including something like abc::def::ghi - $type = 'w'; - - my $sub_name = ""; - if ( defined($2) ) { $sub_name = $2; } - if ( defined($1) ) { - $package = $1; - - # patch: don't allow isolated package name which just ends - # in the old style package separator (single quote). Example: - # use CGI':all'; - if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { - $pos--; - } - - $package =~ s/\'/::/g; - if ( $package =~ /^\:/ ) { $package = 'main' . $package } - $package =~ s/::$//; - } - else { - $package = $current_package; - - if ( $is_keyword{$tok} ) { - $type = 'k'; - } - } - - # if it is a bareword.. - if ( $type eq 'w' ) { - - # check for v-string with leading 'v' type character - # (This seems to have presidence over filehandle, type 'Y') - if ( $tok =~ /^v\d[_\d]*$/ ) { - - # we only have the first part - something like 'v101' - - # look for more - if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { - $pos = pos($input_line); - $numc = $pos - $pos_beg; - $tok = substr( $input_line, $pos_beg, $numc ); - } - $type = 'v'; - - # warn if this version can't handle v-strings - report_v_string($tok); - } - - elsif ( $is_constant{$package}{$sub_name} ) { - $type = 'C'; - } - - # bareword after sort has implied empty prototype; for example: - # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); - # This has priority over whatever the user has specified. - elsif ($last_nonblank_token eq 'sort' - && $last_nonblank_type eq 'k' ) - { - $type = 'Z'; - } - - # Note: strangely, perl does not seem to really let you create - # functions which act like eval and do, in the sense that eval - # and do may have operators following the final }, but any operators - # that you create with prototype (&) apparently do not allow - # trailing operators, only terms. This seems strange. - # If this ever changes, here is the update - # to make perltidy behave accordingly: - - # elsif ( $is_block_function{$package}{$tok} ) { - # $tok='eval'; # patch to do braces like eval - doesn't work - # $type = 'k'; - #} - # FIXME: This could become a separate type to allow for different - # future behavior: - elsif ( $is_block_function{$package}{$sub_name} ) { - $type = 'G'; - } - - elsif ( $is_block_list_function{$package}{$sub_name} ) { - $type = 'G'; - } - elsif ( $is_user_function{$package}{$sub_name} ) { - $type = 'U'; - $prototype = $user_function_prototype{$package}{$sub_name}; - } - - # check for indirect object - elsif ( - - # added 2001-03-27: must not be followed immediately by '(' - # see fhandle.t - ( $input_line !~ m/\G\(/gc ) - - # and - && ( - - # preceded by keyword like 'print', 'printf' and friends - $is_indirect_object_taker{$last_nonblank_token} - - # or preceded by something like 'print(' or 'printf(' - || ( - ( $last_nonblank_token eq '(' ) - && $is_indirect_object_taker{ $paren_type[$paren_depth] - } - - ) - ) - ) - { - - # may not be indirect object unless followed by a space - if ( $input_line =~ m/\G\s+/gc ) { - $type = 'Y'; - - # Abandon Hope ... - # Perl's indirect object notation is a very bad - # thing and can cause subtle bugs, especially for - # beginning programmers. And I haven't even been - # able to figure out a sane warning scheme which - # doesn't get in the way of good scripts. - - # Complain if a filehandle has any lower case - # letters. This is suggested good practice. - # Use 'sub_name' because something like - # main::MYHANDLE is ok for filehandle - if ( $sub_name =~ /[a-z]/ ) { - - # could be bug caused by older perltidy if - # followed by '(' - if ( $input_line =~ m/\G\s*\(/gc ) { - complain( -"Caution: unknown word '$tok' in indirect object slot\n" - ); - } - } - } - - # bareword not followed by a space -- may not be filehandle - # (may be function call defined in a 'use' statement) - else { - $type = 'Z'; - } - } - } - - # Now we must convert back from character position - # to pre_token index. - # I don't think an error flag can occur here ..but who knows - my $error; - ( $i, $error ) = - inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); - if ($error) { - warning("scan_bare_identifier: Possibly invalid tokenization\n"); - } - } - - # no match but line not blank - could be syntax error - # perl will take '::' alone without complaint - else { - $type = 'w'; - - # change this warning to log message if it becomes annoying - warning("didn't find identifier after leading ::\n"); - } - return ( $i, $tok, $type, $prototype ); -} - -sub scan_id_do { - -# This is the new scanner and will eventually replace scan_identifier. -# Only type 'sub' and 'package' are implemented. -# Token types $ * % @ & -> are not yet implemented. -# -# Scan identifier following a type token. -# The type of call depends on $id_scan_state: $id_scan_state = '' -# for starting call, in which case $tok must be the token defining -# the type. -# -# If the type token is the last nonblank token on the line, a value -# of $id_scan_state = $tok is returned, indicating that further -# calls must be made to get the identifier. If the type token is -# not the last nonblank token on the line, the identifier is -# scanned and handled and a value of '' is returned. -# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, -# $statement_type, $tokenizer_self - - my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, - $max_token_index ) - = @_; - my $type = ''; - my ( $i_beg, $pos_beg ); - - #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; - #my ($a,$b,$c) = caller; - #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; - - # on re-entry, start scanning at first token on the line - if ($id_scan_state) { - $i_beg = $i; - $type = ''; - } - - # on initial entry, start scanning just after type token - else { - $i_beg = $i + 1; - $id_scan_state = $tok; - $type = 't'; - } - - # find $i_beg = index of next nonblank token, - # and handle empty lines - my $blank_line = 0; - my $next_nonblank_token = $$rtokens[$i_beg]; - if ( $i_beg > $max_token_index ) { - $blank_line = 1; - } - else { - - # only a '#' immediately after a '$' is not a comment - if ( $next_nonblank_token eq '#' ) { - unless ( $tok eq '$' ) { - $blank_line = 1; - } - } - - if ( $next_nonblank_token =~ /^\s/ ) { - ( $next_nonblank_token, $i_beg ) = - find_next_nonblank_token_on_this_line( $i_beg, $rtokens, - $max_token_index ); - if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { - $blank_line = 1; - } - } - } - - # handle non-blank line; identifier, if any, must follow - unless ($blank_line) { - - if ( $id_scan_state eq 'sub' ) { - ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( - $input_line, $i, $i_beg, - $tok, $type, $rtokens, - $rtoken_map, $id_scan_state, $max_token_index - ); - } - - elsif ( $id_scan_state eq 'package' ) { - ( $i, $tok, $type ) = - do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, - $rtoken_map, $max_token_index ); - $id_scan_state = ''; - } - - else { - warning("invalid token in scan_id: $tok\n"); - $id_scan_state = ''; - } - } - - if ( $id_scan_state && ( !defined($type) || !$type ) ) { - - # shouldn't happen: - warning( -"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" - ); - report_definite_bug(); - } - - TOKENIZER_DEBUG_FLAG_NSCAN && do { - print - "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; - }; - return ( $i, $tok, $type, $id_scan_state ); -} - -sub check_prototype { - my ( $proto, $package, $subname ) = @_; - return unless ( defined($package) && defined($subname) ); - if ( defined($proto) ) { - $proto =~ s/^\s*\(\s*//; - $proto =~ s/\s*\)$//; - if ($proto) { - $is_user_function{$package}{$subname} = 1; - $user_function_prototype{$package}{$subname} = "($proto)"; - - # prototypes containing '&' must be treated specially.. - if ( $proto =~ /\&/ ) { - - # right curly braces of prototypes ending in - # '&' may be followed by an operator - if ( $proto =~ /\&$/ ) { - $is_block_function{$package}{$subname} = 1; - } - - # right curly braces of prototypes NOT ending in - # '&' may NOT be followed by an operator - elsif ( $proto !~ /\&$/ ) { - $is_block_list_function{$package}{$subname} = 1; - } - } - } - else { - $is_constant{$package}{$subname} = 1; - } - } - else { - $is_user_function{$package}{$subname} = 1; - } -} - -sub do_scan_package { - - # do_scan_package parses a package name - # it is called with $i_beg equal to the index of the first nonblank - # token following a 'package' token. - # USES GLOBAL VARIABLES: $current_package, - - my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, - $max_token_index ) - = @_; - my $package = undef; - my $pos_beg = $$rtoken_map[$i_beg]; - pos($input_line) = $pos_beg; - - # handle non-blank line; package name, if any, must follow - if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { - $package = $1; - $package = ( defined($1) && $1 ) ? $1 : 'main'; - $package =~ s/\'/::/g; - if ( $package =~ /^\:/ ) { $package = 'main' . $package } - $package =~ s/::$//; - my $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); - $type = 'i'; - - # Now we must convert back from character position - # to pre_token index. - # I don't think an error flag can occur here ..but ? - my $error; - ( $i, $error ) = - inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); - if ($error) { warning("Possibly invalid package\n") } - $current_package = $package; - - # check for error - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - if ( $next_nonblank_token !~ /^[;\}]$/ ) { - warning( - "Unexpected '$next_nonblank_token' after package name '$tok'\n" - ); - } - } - - # no match but line not blank -- - # could be a label with name package, like package: , for example. - else { - $type = 'k'; - } - - return ( $i, $tok, $type ); -} - -sub scan_identifier_do { - - # This routine assembles tokens into identifiers. It maintains a - # scan state, id_scan_state. It updates id_scan_state based upon - # current id_scan_state and token, and returns an updated - # id_scan_state and the next index after the identifier. - # USES GLOBAL VARIABLES: $context, $last_nonblank_token, - # $last_nonblank_type - - my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, - $expecting ) - = @_; - my $i_begin = $i; - my $type = ''; - my $tok_begin = $$rtokens[$i_begin]; - if ( $tok_begin eq ':' ) { $tok_begin = '::' } - my $id_scan_state_begin = $id_scan_state; - my $identifier_begin = $identifier; - my $tok = $tok_begin; - my $message = ""; - - # these flags will be used to help figure out the type: - my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); - my $saw_type; - - # allow old package separator (') except in 'use' statement - my $allow_tick = ( $last_nonblank_token ne 'use' ); - - # get started by defining a type and a state if necessary - unless ($id_scan_state) { - $context = UNKNOWN_CONTEXT; - - # fixup for digraph - if ( $tok eq '>' ) { - $tok = '->'; - $tok_begin = $tok; - } - $identifier = $tok; - - if ( $tok eq '$' || $tok eq '*' ) { - $id_scan_state = '$'; - $context = SCALAR_CONTEXT; - } - elsif ( $tok eq '%' || $tok eq '@' ) { - $id_scan_state = '$'; - $context = LIST_CONTEXT; - } - elsif ( $tok eq '&' ) { - $id_scan_state = '&'; - } - elsif ( $tok eq 'sub' or $tok eq 'package' ) { - $saw_alpha = 0; # 'sub' is considered type info here - $id_scan_state = '$'; - $identifier .= ' '; # need a space to separate sub from sub name - } - elsif ( $tok eq '::' ) { - $id_scan_state = 'A'; - } - elsif ( $tok =~ /^[A-Za-z_]/ ) { - $id_scan_state = ':'; - } - elsif ( $tok eq '->' ) { - $id_scan_state = '$'; - } - else { - - # shouldn't happen - my ( $a, $b, $c ) = caller; - warning("Program Bug: scan_identifier given bad token = $tok \n"); - warning(" called from sub $a line: $c\n"); - report_definite_bug(); - } - $saw_type = !$saw_alpha; - } - else { - $i--; - $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); - } - - # now loop to gather the identifier - my $i_save = $i; - - while ( $i < $max_token_index ) { - $i_save = $i unless ( $tok =~ /^\s*$/ ); - $tok = $$rtokens[ ++$i ]; - - if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) { - $tok = '::'; - $i++; - } - - if ( $id_scan_state eq '$' ) { # starting variable name - - if ( $tok eq '$' ) { - - $identifier .= $tok; - - # we've got a punctuation variable if end of line (punct.t) - if ( $i == $max_token_index ) { - $type = 'i'; - $id_scan_state = ''; - last; - } - } - elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. - $saw_alpha = 1; - $id_scan_state = ':'; # now need :: - $identifier .= $tok; - } - elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. - $saw_alpha = 1; - $id_scan_state = ':'; # now need :: - $identifier .= $tok; - - # Perl will accept leading digits in identifiers, - # although they may not always produce useful results. - # Something like $main::0 is ok. But this also works: - # - # sub howdy::123::bubba{ print "bubba $54321!\n" } - # howdy::123::bubba(); - # - } - elsif ( $tok =~ /^[0-9]/ ) { # numeric - $saw_alpha = 1; - $id_scan_state = ':'; # now need :: - $identifier .= $tok; - } - elsif ( $tok eq '::' ) { - $id_scan_state = 'A'; - $identifier .= $tok; - } - elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array - $identifier .= $tok; # keep same state, a $ could follow - } - elsif ( $tok eq '{' ) { - - # check for something like ${#} or ${©} - if ( $identifier eq '$' - && $i + 2 <= $max_token_index - && $$rtokens[ $i + 2 ] eq '}' - && $$rtokens[ $i + 1 ] !~ /[\s\w]/ ) - { - my $next2 = $$rtokens[ $i + 2 ]; - my $next1 = $$rtokens[ $i + 1 ]; - $identifier .= $tok . $next1 . $next2; - $i += 2; - $id_scan_state = ''; - last; - } - - # skip something like ${xxx} or ->{ - $id_scan_state = ''; - - # if this is the first token of a line, any tokens for this - # identifier have already been accumulated - if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } - $i = $i_save; - last; - } - - # space ok after leading $ % * & @ - elsif ( $tok =~ /^\s*$/ ) { - - if ( $identifier =~ /^[\$\%\*\&\@]/ ) { - - if ( length($identifier) > 1 ) { - $id_scan_state = ''; - $i = $i_save; - $type = 'i'; # probably punctuation variable - last; - } - else { - - # spaces after $'s are common, and space after @ - # is harmless, so only complain about space - # after other type characters. Space after $ and - # @ will be removed in formatting. Report space - # after % and * because they might indicate a - # parsing error. In other words '% ' might be a - # modulo operator. Delete this warning if it - # gets annoying. - if ( $identifier !~ /^[\@\$]$/ ) { - $message = - "Space in identifier, following $identifier\n"; - } - } - } - - # else: - # space after '->' is ok - } - elsif ( $tok eq '^' ) { - - # check for some special variables like $^W - if ( $identifier =~ /^[\$\*\@\%]$/ ) { - $identifier .= $tok; - $id_scan_state = 'A'; - - # Perl accepts '$^]' or '@^]', but - # there must not be a space before the ']'. - my $next1 = $$rtokens[ $i + 1 ]; - if ( $next1 eq ']' ) { - $i++; - $identifier .= $next1; - $id_scan_state = ""; - last; - } - } - else { - $id_scan_state = ''; - } - } - else { # something else - - # check for various punctuation variables - if ( $identifier =~ /^[\$\*\@\%]$/ ) { - $identifier .= $tok; - } - - elsif ( $identifier eq '$#' ) { - - if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } - - # perl seems to allow just these: $#: $#- $#+ - elsif ( $tok =~ /^[\:\-\+]$/ ) { - $type = 'i'; - $identifier .= $tok; - } - else { - $i = $i_save; - write_logfile_entry( 'Use of $# is deprecated' . "\n" ); - } - } - elsif ( $identifier eq '$$' ) { - - # perl does not allow references to punctuation - # variables without braces. For example, this - # won't work: - # $:=\4; - # $a = $$:; - # You would have to use - # $a = ${$:}; - - $i = $i_save; - if ( $tok eq '{' ) { $type = 't' } - else { $type = 'i' } - } - elsif ( $identifier eq '->' ) { - $i = $i_save; - } - else { - $i = $i_save; - if ( length($identifier) == 1 ) { $identifier = ''; } - } - $id_scan_state = ''; - last; - } - } - elsif ( $id_scan_state eq '&' ) { # starting sub call? - - if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - $identifier .= $tok; - } - elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - $identifier .= $tok; - } - elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - $identifier .= $tok; - } - elsif ( $tok =~ /^\s*$/ ) { # allow space - } - elsif ( $tok eq '::' ) { # leading :: - $id_scan_state = 'A'; # accept alpha next - $identifier .= $tok; - } - elsif ( $tok eq '{' ) { - if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } - $i = $i_save; - $id_scan_state = ''; - last; - } - else { - - # punctuation variable? - # testfile: cunningham4.pl - # - # We have to be careful here. If we are in an unknown state, - # we will reject the punctuation variable. In the following - # example the '&' is a binary opeator but we are in an unknown - # state because there is no sigil on 'Prima', so we don't - # know what it is. But it is a bad guess that - # '&~' is a punction variable. - # $self->{text}->{colorMap}->[ - # Prima::PodView::COLOR_CODE_FOREGROUND - # & ~tb::COLOR_INDEX ] = - # $sec->{ColorCode} - if ( $identifier eq '&' && $expecting ) { - $identifier .= $tok; - } - else { - $identifier = ''; - $i = $i_save; - $type = '&'; - } - $id_scan_state = ''; - last; - } - } - elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) - - if ( $tok =~ /^[A-Za-z_]/ ) { # found it - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( $tok eq "'" && $allow_tick ) { - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { - $id_scan_state = '('; - $identifier .= $tok; - } - elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { - $id_scan_state = ')'; - $identifier .= $tok; - } - else { - $id_scan_state = ''; - $i = $i_save; - last; - } - } - elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha - - if ( $tok eq '::' ) { # got it - $identifier .= $tok; - $id_scan_state = 'A'; # now require alpha - } - elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( $tok eq "'" && $allow_tick ) { # tick - - if ( $is_keyword{$identifier} ) { - $id_scan_state = ''; # that's all - $i = $i_save; - } - else { - $identifier .= $tok; - } - } - elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { - $id_scan_state = '('; - $identifier .= $tok; - } - elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { - $id_scan_state = ')'; - $identifier .= $tok; - } - else { - $id_scan_state = ''; # that's all - $i = $i_save; - last; - } - } - elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype - - if ( $tok eq '(' ) { # got it - $identifier .= $tok; - $id_scan_state = ')'; # now find the end of it - } - elsif ( $tok =~ /^\s*$/ ) { # blank - keep going - $identifier .= $tok; - } - else { - $id_scan_state = ''; # that's all - no prototype - $i = $i_save; - last; - } - } - elsif ( $id_scan_state eq ')' ) { # looking for ) to end - - if ( $tok eq ')' ) { # got it - $identifier .= $tok; - $id_scan_state = ''; # all done - last; - } - elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { - $identifier .= $tok; - } - else { # probable error in script, but keep going - warning("Unexpected '$tok' while seeking end of prototype\n"); - $identifier .= $tok; - } - } - else { # can get here due to error in initialization - $id_scan_state = ''; - $i = $i_save; - last; - } - } - - if ( $id_scan_state eq ')' ) { - warning("Hit end of line while seeking ) to end prototype\n"); - } - - # once we enter the actual identifier, it may not extend beyond - # the end of the current line - if ( $id_scan_state =~ /^[A\:\(\)]/ ) { - $id_scan_state = ''; - } - if ( $i < 0 ) { $i = 0 } - - unless ($type) { - - if ($saw_type) { - - if ($saw_alpha) { - if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { - $type = 'w'; - } - else { $type = 'i' } - } - elsif ( $identifier eq '->' ) { - $type = '->'; - } - elsif ( - ( length($identifier) > 1 ) - - # In something like '@$=' we have an identifier '@$' - # In something like '$${' we have type '$$' (and only - # part of an identifier) - && !( $identifier =~ /\$$/ && $tok eq '{' ) - && ( $identifier !~ /^(sub |package )$/ ) - ) - { - $type = 'i'; - } - else { $type = 't' } - } - elsif ($saw_alpha) { - - # type 'w' includes anything without leading type info - # ($,%,@,*) including something like abc::def::ghi - $type = 'w'; - } - else { - $type = ''; - } # this can happen on a restart - } - - if ($identifier) { - $tok = $identifier; - if ($message) { write_logfile_entry($message) } - } - else { - $tok = $tok_begin; - $i = $i_begin; - } - - TOKENIZER_DEBUG_FLAG_SCAN_ID && do { - my ( $a, $b, $c ) = caller; - print -"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; - print -"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; - }; - return ( $i, $tok, $type, $id_scan_state, $identifier ); -} - -{ - - # saved package and subnames in case prototype is on separate line - my ( $package_saved, $subname_saved ); - - sub do_scan_sub { - - # do_scan_sub parses a sub name and prototype - # it is called with $i_beg equal to the index of the first nonblank - # token following a 'sub' token. - - # TODO: add future error checks to be sure we have a valid - # sub name. For example, 'sub &doit' is wrong. Also, be sure - # a name is given if and only if a non-anonymous sub is - # appropriate. - # USES GLOBAL VARS: $current_package, $last_nonblank_token, - # $in_attribute_list, %saw_function_definition, - # $statement_type - - my ( - $input_line, $i, $i_beg, - $tok, $type, $rtokens, - $rtoken_map, $id_scan_state, $max_token_index - ) = @_; - $id_scan_state = ""; # normally we get everything in one call - my $subname = undef; - my $package = undef; - my $proto = undef; - my $attrs = undef; - my $match; - - my $pos_beg = $$rtoken_map[$i_beg]; - pos($input_line) = $pos_beg; - - # sub NAME PROTO ATTRS - if ( - $input_line =~ m/\G\s* - ((?:\w*(?:'|::))*) # package - something that ends in :: or ' - (\w+) # NAME - required - (\s*\([^){]*\))? # PROTO - something in parens - (\s*:)? # ATTRS - leading : of attribute list - /gcx - ) - { - $match = 1; - $subname = $2; - $proto = $3; - $attrs = $4; - - $package = ( defined($1) && $1 ) ? $1 : $current_package; - $package =~ s/\'/::/g; - if ( $package =~ /^\:/ ) { $package = 'main' . $package } - $package =~ s/::$//; - my $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); - $type = 'i'; - } - - # Look for prototype/attributes not preceded on this line by subname; - # This might be an anonymous sub with attributes, - # or a prototype on a separate line from its sub name - elsif ( - $input_line =~ m/\G(\s*\([^){]*\))? # PROTO - (\s*:)? # ATTRS leading ':' - /gcx - && ( $1 || $2 ) - ) - { - $match = 1; - $proto = $1; - $attrs = $2; - - # Handle prototype on separate line from subname - if ($subname_saved) { - $package = $package_saved; - $subname = $subname_saved; - $tok = $last_nonblank_token; - } - $type = 'i'; - } - - if ($match) { - - # ATTRS: if there are attributes, back up and let the ':' be - # found later by the scanner. - my $pos = pos($input_line); - if ($attrs) { - $pos -= length($attrs); - } - - my $next_nonblank_token = $tok; - - # catch case of line with leading ATTR ':' after anonymous sub - if ( $pos == $pos_beg && $tok eq ':' ) { - $type = 'A'; - $in_attribute_list = 1; - } - - # We must convert back from character position - # to pre_token index. - else { - - # I don't think an error flag can occur here ..but ? - my $error; - ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, - $max_token_index ); - if ($error) { warning("Possibly invalid sub\n") } - - # check for multiple definitions of a sub - ( $next_nonblank_token, my $i_next ) = - find_next_nonblank_token_on_this_line( $i, $rtokens, - $max_token_index ); - } - - if ( $next_nonblank_token =~ /^(\s*|#)$/ ) - { # skip blank or side comment - my ( $rpre_tokens, $rpre_types ) = - peek_ahead_for_n_nonblank_pre_tokens(1); - if ( defined($rpre_tokens) && @$rpre_tokens ) { - $next_nonblank_token = $rpre_tokens->[0]; - } - else { - $next_nonblank_token = '}'; - } - } - $package_saved = ""; - $subname_saved = ""; - if ( $next_nonblank_token eq '{' ) { - if ($subname) { - - # Check for multiple definitions of a sub, but - # it is ok to have multiple sub BEGIN, etc, - # so we do not complain if name is all caps - if ( $saw_function_definition{$package}{$subname} - && $subname !~ /^[A-Z]+$/ ) - { - my $lno = $saw_function_definition{$package}{$subname}; - warning( -"already saw definition of 'sub $subname' in package '$package' at line $lno\n" - ); - } - $saw_function_definition{$package}{$subname} = - $tokenizer_self->{_last_line_number}; - } - } - elsif ( $next_nonblank_token eq ';' ) { - } - elsif ( $next_nonblank_token eq '}' ) { - } - - # ATTRS - if an attribute list follows, remember the name - # of the sub so the next opening brace can be labeled. - # Setting 'statement_type' causes any ':'s to introduce - # attributes. - elsif ( $next_nonblank_token eq ':' ) { - $statement_type = $tok; - } - - # see if PROTO follows on another line: - elsif ( $next_nonblank_token eq '(' ) { - if ( $attrs || $proto ) { - warning( -"unexpected '(' after definition or declaration of sub '$subname'\n" - ); - } - else { - $id_scan_state = 'sub'; # we must come back to get proto - $statement_type = $tok; - $package_saved = $package; - $subname_saved = $subname; - } - } - elsif ($next_nonblank_token) { # EOF technically ok - warning( -"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" - ); - } - check_prototype( $proto, $package, $subname ); - } - - # no match but line not blank - else { - } - return ( $i, $tok, $type, $id_scan_state ); - } -} - -#########i############################################################### -# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS -######################################################################### - -sub find_next_nonblank_token { - my ( $i, $rtokens, $max_token_index ) = @_; - - if ( $i >= $max_token_index ) { - if ( !peeked_ahead() ) { - peeked_ahead(1); - $rtokens = - peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); - } - } - my $next_nonblank_token = $$rtokens[ ++$i ]; - - if ( $next_nonblank_token =~ /^\s*$/ ) { - $next_nonblank_token = $$rtokens[ ++$i ]; - } - return ( $next_nonblank_token, $i ); -} - -sub numerator_expected { - - # this is a filter for a possible numerator, in support of guessing - # for the / pattern delimiter token. - # returns - - # 1 - yes - # 0 - can't tell - # -1 - no - # Note: I am using the convention that variables ending in - # _expected have these 3 possible values. - my ( $i, $rtokens, $max_token_index ) = @_; - my $next_token = $$rtokens[ $i + 1 ]; - if ( $next_token eq '=' ) { $i++; } # handle /= - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { - 1; - } - else { - - if ( $next_nonblank_token =~ /^\s*$/ ) { - 0; - } - else { - -1; - } - } -} - -sub pattern_expected { - - # This is the start of a filter for a possible pattern. - # It looks at the token after a possbible pattern and tries to - # determine if that token could end a pattern. - # returns - - # 1 - yes - # 0 - can't tell - # -1 - no - my ( $i, $rtokens, $max_token_index ) = @_; - my $next_token = $$rtokens[ $i + 1 ]; - if ( $next_token =~ /^[cgimosxp]/ ) { $i++; } # skip possible modifier - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - # list of tokens which may follow a pattern - # (can probably be expanded) - if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) - { - 1; - } - else { - - if ( $next_nonblank_token =~ /^\s*$/ ) { - 0; - } - else { - -1; - } - } -} - -sub find_next_nonblank_token_on_this_line { - my ( $i, $rtokens, $max_token_index ) = @_; - my $next_nonblank_token; - - if ( $i < $max_token_index ) { - $next_nonblank_token = $$rtokens[ ++$i ]; - - if ( $next_nonblank_token =~ /^\s*$/ ) { - - if ( $i < $max_token_index ) { - $next_nonblank_token = $$rtokens[ ++$i ]; - } - } - } - else { - $next_nonblank_token = ""; - } - return ( $next_nonblank_token, $i ); -} - -sub find_angle_operator_termination { - - # We are looking at a '<' and want to know if it is an angle operator. - # We are to return: - # $i = pretoken index of ending '>' if found, current $i otherwise - # $type = 'Q' if found, '>' otherwise - my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; - my $i = $i_beg; - my $type = '<'; - pos($input_line) = 1 + $$rtoken_map[$i]; - - my $filter; - - # we just have to find the next '>' if a term is expected - if ( $expecting == TERM ) { $filter = '[\>]' } - - # we have to guess if we don't know what is expected - elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } - - # shouldn't happen - we shouldn't be here if operator is expected - else { warning("Program Bug in find_angle_operator_termination\n") } - - # To illustrate what we might be looking at, in case we are - # guessing, here are some examples of valid angle operators - # (or file globs): - # - # - # <$fh> - # <*.c *.h> - # <_> - # ( glob.t) - # <${PREFIX}*img*.$IMAGE_TYPE> - # - # - # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> - # - # Here are some examples of lines which do not have angle operators: - # return undef unless $self->[2]++ < $#{$self->[1]}; - # < 2 || @$t > - # - # the following line from dlister.pl caused trouble: - # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; - # - # If the '<' starts an angle operator, it must end on this line and - # it must not have certain characters like ';' and '=' in it. I use - # this to limit the testing. This filter should be improved if - # possible. - - if ( $input_line =~ /($filter)/g ) { - - if ( $1 eq '>' ) { - - # We MAY have found an angle operator termination if we get - # here, but we need to do more to be sure we haven't been - # fooled. - my $pos = pos($input_line); - - my $pos_beg = $$rtoken_map[$i]; - my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); - - # Reject if the closing '>' follows a '-' as in: - # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { } - if ( $expecting eq UNKNOWN ) { - my $check = substr( $input_line, $pos - 2, 1 ); - if ( $check eq '-' ) { - return ( $i, $type ); - } - } - - ######################################debug##### - #write_diagnostics( "ANGLE? :$str\n"); - #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; - ######################################debug##### - $type = 'Q'; - my $error; - ( $i, $error ) = - inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); - - # It may be possible that a quote ends midway in a pretoken. - # If this happens, it may be necessary to split the pretoken. - if ($error) { - warning( - "Possible tokinization error..please check this line\n"); - report_possible_bug(); - } - - # Now let's see where we stand.... - # OK if math op not possible - if ( $expecting == TERM ) { - } - - # OK if there are no more than 2 pre-tokens inside - # (not possible to write 2 token math between < and >) - # This catches most common cases - elsif ( $i <= $i_beg + 3 ) { - write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); - } - - # Not sure.. - else { - - # Let's try a Brace Test: any braces inside must balance - my $br = 0; - while ( $str =~ /\{/g ) { $br++ } - while ( $str =~ /\}/g ) { $br-- } - my $sb = 0; - while ( $str =~ /\[/g ) { $sb++ } - while ( $str =~ /\]/g ) { $sb-- } - my $pr = 0; - while ( $str =~ /\(/g ) { $pr++ } - while ( $str =~ /\)/g ) { $pr-- } - - # if braces do not balance - not angle operator - if ( $br || $sb || $pr ) { - $i = $i_beg; - $type = '<'; - write_diagnostics( - "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); - } - - # we should keep doing more checks here...to be continued - # Tentatively accepting this as a valid angle operator. - # There are lots more things that can be checked. - else { - write_diagnostics( - "ANGLE-Guessing yes: $str expecting=$expecting\n"); - write_logfile_entry("Guessing angle operator here: $str\n"); - } - } - } - - # didn't find ending > - else { - if ( $expecting == TERM ) { - warning("No ending > for angle operator\n"); - } - } - } - return ( $i, $type ); -} - -sub scan_number_do { - - # scan a number in any of the formats that Perl accepts - # Underbars (_) are allowed in decimal numbers. - # input parameters - - # $input_line - the string to scan - # $i - pre_token index to start scanning - # $rtoken_map - reference to the pre_token map giving starting - # character position in $input_line of token $i - # output parameters - - # $i - last pre_token index of the number just scanned - # number - the number (characters); or undef if not a number - - my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; - my $pos_beg = $$rtoken_map[$i]; - my $pos; - my $i_begin = $i; - my $number = undef; - my $type = $input_type; - - my $first_char = substr( $input_line, $pos_beg, 1 ); - - # Look for bad starting characters; Shouldn't happen.. - if ( $first_char !~ /[\d\.\+\-Ee]/ ) { - warning("Program bug - scan_number given character $first_char\n"); - report_definite_bug(); - return ( $i, $type, $number ); - } - - # handle v-string without leading 'v' character ('Two Dot' rule) - # (vstring.t) - # TODO: v-strings may contain underscores - pos($input_line) = $pos_beg; - if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { - $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $number = substr( $input_line, $pos_beg, $numc ); - $type = 'v'; - report_v_string($number); - } - - # handle octal, hex, binary - if ( !defined($number) ) { - pos($input_line) = $pos_beg; - if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g ) - { - $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $number = substr( $input_line, $pos_beg, $numc ); - $type = 'n'; - } - } - - # handle decimal - if ( !defined($number) ) { - pos($input_line) = $pos_beg; - - if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { - $pos = pos($input_line); - - # watch out for things like 0..40 which would give 0. by this; - if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) - && ( substr( $input_line, $pos, 1 ) eq '.' ) ) - { - $pos--; - } - my $numc = $pos - $pos_beg; - $number = substr( $input_line, $pos_beg, $numc ); - $type = 'n'; - } - } - - # filter out non-numbers like e + - . e2 .e3 +e6 - # the rule: at least one digit, and any 'e' must be preceded by a digit - if ( - $number !~ /\d/ # no digits - || ( $number =~ /^(.*)[eE]/ - && $1 !~ /\d/ ) # or no digits before the 'e' - ) - { - $number = undef; - $type = $input_type; - return ( $i, $type, $number ); - } - - # Found a number; now we must convert back from character position - # to pre_token index. An error here implies user syntax error. - # An example would be an invalid octal number like '009'. - my $error; - ( $i, $error ) = - inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); - if ($error) { warning("Possibly invalid number\n") } - - return ( $i, $type, $number ); -} - -sub inverse_pretoken_map { - - # Starting with the current pre_token index $i, scan forward until - # finding the index of the next pre_token whose position is $pos. - my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; - my $error = 0; - - while ( ++$i <= $max_token_index ) { - - if ( $pos <= $$rtoken_map[$i] ) { - - # Let the calling routine handle errors in which we do not - # land on a pre-token boundary. It can happen by running - # perltidy on some non-perl scripts, for example. - if ( $pos < $$rtoken_map[$i] ) { $error = 1 } - $i--; - last; - } - } - return ( $i, $error ); -} - -sub find_here_doc { - - # find the target of a here document, if any - # input parameters: - # $i - token index of the second < of << - # ($i must be less than the last token index if this is called) - # output parameters: - # $found_target = 0 didn't find target; =1 found target - # HERE_TARGET - the target string (may be empty string) - # $i - unchanged if not here doc, - # or index of the last token of the here target - # $saw_error - flag noting unbalanced quote on here target - my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; - my $ibeg = $i; - my $found_target = 0; - my $here_doc_target = ''; - my $here_quote_character = ''; - my $saw_error = 0; - my ( $next_nonblank_token, $i_next_nonblank, $next_token ); - $next_token = $$rtokens[ $i + 1 ]; - - # perl allows a backslash before the target string (heredoc.t) - my $backslash = 0; - if ( $next_token eq '\\' ) { - $backslash = 1; - $next_token = $$rtokens[ $i + 2 ]; - } - - ( $next_nonblank_token, $i_next_nonblank ) = - find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); - - if ( $next_nonblank_token =~ /[\'\"\`]/ ) { - - my $in_quote = 1; - my $quote_depth = 0; - my $quote_pos = 0; - my $quoted_string; - - ( - $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, - $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); - - if ($in_quote) { # didn't find end of quote, so no target found - $i = $ibeg; - if ( $expecting == TERM ) { - warning( -"Did not find here-doc string terminator ($here_quote_character) before end of line \n" - ); - $saw_error = 1; - } - } - else { # found ending quote - my $j; - $found_target = 1; - - my $tokj; - for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) { - $tokj = $$rtokens[$j]; - - # we have to remove any backslash before the quote character - # so that the here-doc-target exactly matches this string - next - if ( $tokj eq "\\" - && $j < $i - 1 - && $$rtokens[ $j + 1 ] eq $here_quote_character ); - $here_doc_target .= $tokj; - } - } - } - - elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { - $found_target = 1; - write_logfile_entry( - "found blank here-target after <<; suggest using \"\"\n"); - $i = $ibeg; - } - elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << - - my $here_doc_expected; - if ( $expecting == UNKNOWN ) { - $here_doc_expected = guess_if_here_doc($next_token); - } - else { - $here_doc_expected = 1; - } - - if ($here_doc_expected) { - $found_target = 1; - $here_doc_target = $next_token; - $i = $ibeg + 1; - } - - } - else { - - if ( $expecting == TERM ) { - $found_target = 1; - write_logfile_entry("Note: bare here-doc operator <<\n"); - } - else { - $i = $ibeg; - } - } - - # patch to neglect any prepended backslash - if ( $found_target && $backslash ) { $i++ } - - return ( $found_target, $here_doc_target, $here_quote_character, $i, - $saw_error ); -} - -sub do_quote { - - # follow (or continue following) quoted string(s) - # $in_quote return code: - # 0 - ok, found end - # 1 - still must find end of quote whose target is $quote_character - # 2 - still looking for end of first of two quotes - # - # Returns updated strings: - # $quoted_string_1 = quoted string seen while in_quote=1 - # $quoted_string_2 = quoted string seen while in_quote=2 - my ( - $i, $in_quote, $quote_character, - $quote_pos, $quote_depth, $quoted_string_1, - $quoted_string_2, $rtokens, $rtoken_map, - $max_token_index - ) = @_; - - my $in_quote_starting = $in_quote; - - my $quoted_string; - if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow - my $ibeg = $i; - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, - $quote_pos, $quote_depth, $max_token_index ); - $quoted_string_2 .= $quoted_string; - if ( $in_quote == 1 ) { - if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } - $quote_character = ''; - } - else { - $quoted_string_2 .= "\n"; - } - } - - if ( $in_quote == 1 ) { # one (more) quote to follow - my $ibeg = $i; - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, - $quote_pos, $quote_depth, $max_token_index ); - $quoted_string_1 .= $quoted_string; - if ( $in_quote == 1 ) { - $quoted_string_1 .= "\n"; - } - } - return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string_1, $quoted_string_2 ); -} - -sub follow_quoted_string { - - # scan for a specific token, skipping escaped characters - # if the quote character is blank, use the first non-blank character - # input parameters: - # $rtokens = reference to the array of tokens - # $i = the token index of the first character to search - # $in_quote = number of quoted strings being followed - # $beginning_tok = the starting quote character - # $quote_pos = index to check next for alphanumeric delimiter - # output parameters: - # $i = the token index of the ending quote character - # $in_quote = decremented if found end, unchanged if not - # $beginning_tok = the starting quote character - # $quote_pos = index to check next for alphanumeric delimiter - # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. - # $quoted_string = the text of the quote (without quotation tokens) - my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, - $max_token_index ) - = @_; - my ( $tok, $end_tok ); - my $i = $i_beg - 1; - my $quoted_string = ""; - - TOKENIZER_DEBUG_FLAG_QUOTE && do { - print -"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; - }; - - # get the corresponding end token - if ( $beginning_tok !~ /^\s*$/ ) { - $end_tok = matching_end_token($beginning_tok); - } - - # a blank token means we must find and use the first non-blank one - else { - my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a - - while ( $i < $max_token_index ) { - $tok = $$rtokens[ ++$i ]; - - if ( $tok !~ /^\s*$/ ) { - - if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { - $i = $max_token_index; - } - else { - - if ( length($tok) > 1 ) { - if ( $quote_pos <= 0 ) { $quote_pos = 1 } - $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); - } - else { - $beginning_tok = $tok; - $quote_pos = 0; - } - $end_tok = matching_end_token($beginning_tok); - $quote_depth = 1; - last; - } - } - else { - $allow_quote_comments = 1; - } - } - } - - # There are two different loops which search for the ending quote - # character. In the rare case of an alphanumeric quote delimiter, we - # have to look through alphanumeric tokens character-by-character, since - # the pre-tokenization process combines multiple alphanumeric - # characters, whereas for a non-alphanumeric delimiter, only tokens of - # length 1 can match. - - ################################################################### - # Case 1 (rare): loop for case of alphanumeric quote delimiter.. - # "quote_pos" is the position the current word to begin searching - ################################################################### - if ( $beginning_tok =~ /\w/ ) { - - # Note this because it is not recommended practice except - # for obfuscated perl contests - if ( $in_quote == 1 ) { - write_logfile_entry( - "Note: alphanumeric quote delimiter ($beginning_tok) \n"); - } - - while ( $i < $max_token_index ) { - - if ( $quote_pos == 0 || ( $i < 0 ) ) { - $tok = $$rtokens[ ++$i ]; - - if ( $tok eq '\\' ) { - - # retain backslash unless it hides the end token - $quoted_string .= $tok - unless $$rtokens[ $i + 1 ] eq $end_tok; - $quote_pos++; - last if ( $i >= $max_token_index ); - $tok = $$rtokens[ ++$i ]; - } - } - my $old_pos = $quote_pos; - - unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) - { - - } - $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); - - if ( $quote_pos > 0 ) { - - $quoted_string .= - substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); - - $quote_depth--; - - if ( $quote_depth == 0 ) { - $in_quote--; - last; - } - } - else { - $quoted_string .= substr( $tok, $old_pos ); - } - } - } - - ######################################################################## - # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. - ######################################################################## - else { - - while ( $i < $max_token_index ) { - $tok = $$rtokens[ ++$i ]; - - if ( $tok eq $end_tok ) { - $quote_depth--; - - if ( $quote_depth == 0 ) { - $in_quote--; - last; - } - } - elsif ( $tok eq $beginning_tok ) { - $quote_depth++; - } - elsif ( $tok eq '\\' ) { - - # retain backslash unless it hides the beginning or end token - $tok = $$rtokens[ ++$i ]; - $quoted_string .= '\\' - unless ( $tok eq $end_tok || $tok eq $beginning_tok ); - } - $quoted_string .= $tok; - } - } - if ( $i > $max_token_index ) { $i = $max_token_index } - return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, - $quoted_string ); -} - -sub indicate_error { - my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; - interrupt_logfile(); - warning($msg); - write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); - resume_logfile(); -} - -sub write_error_indicator_pair { - my ( $line_number, $input_line, $pos, $carrat ) = @_; - my ( $offset, $numbered_line, $underline ) = - make_numbered_line( $line_number, $input_line, $pos ); - $underline = write_on_underline( $underline, $pos - $offset, $carrat ); - warning( $numbered_line . "\n" ); - $underline =~ s/\s*$//; - warning( $underline . "\n" ); -} - -sub make_numbered_line { - - # Given an input line, its line number, and a character position of - # interest, create a string not longer than 80 characters of the form - # $lineno: sub_string - # such that the sub_string of $str contains the position of interest - # - # Here is an example of what we want, in this case we add trailing - # '...' because the line is long. - # - # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... - # - # Here is another example, this time in which we used leading '...' - # because of excessive length: - # - # 2: ... er of the World Wide Web Consortium's - # - # input parameters are: - # $lineno = line number - # $str = the text of the line - # $pos = position of interest (the error) : 0 = first character - # - # We return : - # - $offset = an offset which corrects the position in case we only - # display part of a line, such that $pos-$offset is the effective - # position from the start of the displayed line. - # - $numbered_line = the numbered line as above, - # - $underline = a blank 'underline' which is all spaces with the same - # number of characters as the numbered line. - - my ( $lineno, $str, $pos ) = @_; - my $offset = ( $pos < 60 ) ? 0 : $pos - 40; - my $excess = length($str) - $offset - 68; - my $numc = ( $excess > 0 ) ? 68 : undef; - - if ( defined($numc) ) { - if ( $offset == 0 ) { - $str = substr( $str, $offset, $numc - 4 ) . " ..."; - } - else { - $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; - } - } - else { - - if ( $offset == 0 ) { - } - else { - $str = "... " . substr( $str, $offset + 4 ); - } - } - - my $numbered_line = sprintf( "%d: ", $lineno ); - $offset -= length($numbered_line); - $numbered_line .= $str; - my $underline = " " x length($numbered_line); - return ( $offset, $numbered_line, $underline ); -} - -sub write_on_underline { - - # The "underline" is a string that shows where an error is; it starts - # out as a string of blanks with the same length as the numbered line of - # code above it, and we have to add marking to show where an error is. - # In the example below, we want to write the string '--^' just below - # the line of bad code: - # - # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... - # ---^ - # We are given the current underline string, plus a position and a - # string to write on it. - # - # In the above example, there will be 2 calls to do this: - # First call: $pos=19, pos_chr=^ - # Second call: $pos=16, pos_chr=--- - # - # This is a trivial thing to do with substr, but there is some - # checking to do. - - my ( $underline, $pos, $pos_chr ) = @_; - - # check for error..shouldn't happen - unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { - return $underline; - } - my $excess = length($pos_chr) + $pos - length($underline); - if ( $excess > 0 ) { - $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); - } - substr( $underline, $pos, length($pos_chr) ) = $pos_chr; - return ($underline); -} - -sub pre_tokenize { - - # Break a string, $str, into a sequence of preliminary tokens. We - # are interested in these types of tokens: - # words (type='w'), example: 'max_tokens_wanted' - # digits (type = 'd'), example: '0755' - # whitespace (type = 'b'), example: ' ' - # any other single character (i.e. punct; type = the character itself). - # We cannot do better than this yet because we might be in a quoted - # string or pattern. Caller sets $max_tokens_wanted to 0 to get all - # tokens. - my ( $str, $max_tokens_wanted ) = @_; - - # we return references to these 3 arrays: - my @tokens = (); # array of the tokens themselves - my @token_map = (0); # string position of start of each token - my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct - - do { - - # whitespace - if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } - - # numbers - # note that this must come before words! - elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } - - # words - elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } - - # single-character punctuation - elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } - - # that's all.. - else { - return ( \@tokens, \@token_map, \@type ); - } - - push @tokens, $1; - push @token_map, pos($str); - - } while ( --$max_tokens_wanted != 0 ); - - return ( \@tokens, \@token_map, \@type ); -} - -sub show_tokens { - - # this is an old debug routine - my ( $rtokens, $rtoken_map ) = @_; - my $num = scalar(@$rtokens); - my $i; - - for ( $i = 0 ; $i < $num ; $i++ ) { - my $len = length( $$rtokens[$i] ); - print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; - } -} - -sub matching_end_token { - - # find closing character for a pattern - my $beginning_token = shift; - - if ( $beginning_token eq '{' ) { - '}'; - } - elsif ( $beginning_token eq '[' ) { - ']'; - } - elsif ( $beginning_token eq '<' ) { - '>'; - } - elsif ( $beginning_token eq '(' ) { - ')'; - } - else { - $beginning_token; - } -} - -sub dump_token_types { - my $class = shift; - my $fh = shift; - - # This should be the latest list of token types in use - # adding NEW_TOKENS: add a comment here - print $fh <<'END_OF_LIST'; - -Here is a list of the token types currently used for lines of type 'CODE'. -For the following tokens, the "type" of a token is just the token itself. - -.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> -( ) <= >= == =~ !~ != ++ -- /= x= -... **= <<= >>= &&= ||= //= <=> -, + - / * | % ! x ~ = \ ? : . < > ^ & - -The following additional token types are defined: - - type meaning - b blank (white space) - { indent: opening structural curly brace or square bracket or paren - (code block, anonymous hash reference, or anonymous array reference) - } outdent: right structural curly brace or square bracket or paren - [ left non-structural square bracket (enclosing an array index) - ] right non-structural square bracket - ( left non-structural paren (all but a list right of an =) - ) right non-structural parena - L left non-structural curly brace (enclosing a key) - R right non-structural curly brace - ; terminal semicolon - f indicates a semicolon in a "for" statement - h here_doc operator << - # a comment - Q indicates a quote or pattern - q indicates a qw quote block - k a perl keyword - C user-defined constant or constant function (with void prototype = ()) - U user-defined function taking parameters - G user-defined function taking block parameter (like grep/map/eval) - M (unused, but reserved for subroutine definition name) - P (unused, but -html uses it to label pod text) - t type indicater such as %,$,@,*,&,sub - w bare word (perhaps a subroutine call) - i identifier of some type (with leading %, $, @, *, &, sub, -> ) - n a number - v a v-string - F a file test operator (like -e) - Y File handle - Z identifier in indirect object slot: may be file handle, object - J LABEL: code block label - j LABEL after next, last, redo, goto - p unary + - m unary - - pp pre-increment operator ++ - mm pre-decrement operator -- - A : used as attribute separator - - Here are the '_line_type' codes used internally: - SYSTEM - system-specific code before hash-bang line - CODE - line of perl code (including comments) - POD_START - line starting pod, such as '=head' - POD - pod documentation text - POD_END - last line of pod section, '=cut' - HERE - text of here-document - HERE_END - last line of here-doc (target word) - FORMAT - format section - FORMAT_END - last line of format section, '.' - DATA_START - __DATA__ line - DATA - unidentified text following __DATA__ - END_START - __END__ line - END - unidentified text following __END__ - ERROR - we are in big trouble, probably not a perl script -END_OF_LIST -} - -BEGIN { - - # These names are used in error messages - @opening_brace_names = qw# '{' '[' '(' '?' #; - @closing_brace_names = qw# '}' ']' ')' ':' #; - - my @digraphs = qw( - .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> - <= >= == =~ !~ != ++ -- /= x= ~~ - ); - @is_digraph{@digraphs} = (1) x scalar(@digraphs); - - my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ ); - @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); - - # make a hash of all valid token types for self-checking the tokenizer - # (adding NEW_TOKENS : select a new character and add to this list) - my @valid_token_types = qw# - A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v - { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & - #; - push( @valid_token_types, @digraphs ); - push( @valid_token_types, @trigraphs ); - push( @valid_token_types, '#' ); - push( @valid_token_types, ',' ); - @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); - - # a list of file test letters, as in -e (Table 3-4 of 'camel 3') - my @file_test_operators = - qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); - @is_file_test_operator{@file_test_operators} = - (1) x scalar(@file_test_operators); - - # these functions have prototypes of the form (&), so when they are - # followed by a block, that block MAY BE followed by an operator. - @_ = qw( do eval ); - @is_block_operator{@_} = (1) x scalar(@_); - - # these functions allow an identifier in the indirect object slot - @_ = qw( print printf sort exec system say); - @is_indirect_object_taker{@_} = (1) x scalar(@_); - - # These tokens may precede a code block - # patched for SWITCH/CASE - @_ = - qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else - unless do while until eval for foreach map grep sort - switch case given when); - @is_code_block_token{@_} = (1) x scalar(@_); - - # I'll build the list of keywords incrementally - my @Keywords = (); - - # keywords and tokens after which a value or pattern is expected, - # but not an operator. In other words, these should consume terms - # to their right, or at least they are not expected to be followed - # immediately by operators. - my @value_requestor = qw( - AUTOLOAD - BEGIN - CHECK - DESTROY - END - EQ - GE - GT - INIT - LE - LT - NE - UNITCHECK - abs - accept - alarm - and - atan2 - bind - binmode - bless - break - caller - chdir - chmod - chomp - chop - chown - chr - chroot - close - closedir - cmp - connect - continue - cos - crypt - dbmclose - dbmopen - defined - delete - die - dump - each - else - elsif - eof - eq - exec - exists - exit - exp - fcntl - fileno - flock - for - foreach - formline - ge - getc - getgrgid - getgrnam - gethostbyaddr - gethostbyname - getnetbyaddr - getnetbyname - getpeername - getpgrp - getpriority - getprotobyname - getprotobynumber - getpwnam - getpwuid - getservbyname - getservbyport - getsockname - getsockopt - glob - gmtime - goto - grep - gt - hex - if - index - int - ioctl - join - keys - kill - last - lc - lcfirst - le - length - link - listen - local - localtime - lock - log - lstat - lt - map - mkdir - msgctl - msgget - msgrcv - msgsnd - my - ne - next - no - not - oct - open - opendir - or - ord - our - pack - pipe - pop - pos - print - printf - prototype - push - quotemeta - rand - read - readdir - readlink - readline - readpipe - recv - redo - ref - rename - require - reset - return - reverse - rewinddir - rindex - rmdir - scalar - seek - seekdir - select - semctl - semget - semop - send - sethostent - setnetent - setpgrp - setpriority - setprotoent - setservent - setsockopt - shift - shmctl - shmget - shmread - shmwrite - shutdown - sin - sleep - socket - socketpair - sort - splice - split - sprintf - sqrt - srand - stat - study - substr - symlink - syscall - sysopen - sysread - sysseek - system - syswrite - tell - telldir - tie - tied - truncate - uc - ucfirst - umask - undef - unless - unlink - unpack - unshift - untie - until - use - utime - values - vec - waitpid - warn - while - write - xor - - switch - case - given - when - err - say - ); - - # patched above for SWITCH/CASE given/when err say - # 'err' is a fairly safe addition. - # TODO: 'default' still needed if appropriate - # 'use feature' seen, but perltidy works ok without it. - # Concerned that 'default' could break code. - push( @Keywords, @value_requestor ); - - # These are treated the same but are not keywords: - my @extra_vr = qw( - constant - vars - ); - push( @value_requestor, @extra_vr ); - - @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); - - # this list contains keywords which do not look for arguments, - # so that they might be followed by an operator, or at least - # not a term. - my @operator_requestor = qw( - endgrent - endhostent - endnetent - endprotoent - endpwent - endservent - fork - getgrent - gethostent - getlogin - getnetent - getppid - getprotoent - getpwent - getservent - setgrent - setpwent - time - times - wait - wantarray - ); - - push( @Keywords, @operator_requestor ); - - # These are treated the same but are not considered keywords: - my @extra_or = qw( - STDERR - STDIN - STDOUT - ); - - push( @operator_requestor, @extra_or ); - - @expecting_operator_token{@operator_requestor} = - (1) x scalar(@operator_requestor); - - # these token TYPES expect trailing operator but not a term - # note: ++ and -- are post-increment and decrement, 'C' = constant - my @operator_requestor_types = qw( ++ -- C <> q ); - @expecting_operator_types{@operator_requestor_types} = - (1) x scalar(@operator_requestor_types); - - # these token TYPES consume values (terms) - # note: pp and mm are pre-increment and decrement - # f=semicolon in for, F=file test operator - my @value_requestor_type = qw# - L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x - **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= - <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ - f F pp mm Y p m U J G j >> << ^ t - #; - push( @value_requestor_type, ',' ) - ; # (perl doesn't like a ',' in a qw block) - @expecting_term_types{@value_requestor_type} = - (1) x scalar(@value_requestor_type); - - # Note: the following valid token types are not assigned here to - # hashes requesting to be followed by values or terms, but are - # instead currently hard-coded into sub operator_expected: - # ) -> :: Q R Z ] b h i k n v w } # - - # For simple syntax checking, it is nice to have a list of operators which - # will really be unhappy if not followed by a term. This includes most - # of the above... - %really_want_term = %expecting_term_types; - - # with these exceptions... - delete $really_want_term{'U'}; # user sub, depends on prototype - delete $really_want_term{'F'}; # file test works on $_ if no following term - delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; - # let perl do it - - @_ = qw(q qq qw qx qr s y tr m); - @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_); - - # These keywords are handled specially in the tokenizer code: - my @special_keywords = qw( - do - eval - format - m - package - q - qq - qr - qw - qx - s - sub - tr - y - ); - push( @Keywords, @special_keywords ); - - # Keywords after which list formatting may be used - # WARNING: do not include |map|grep|eval or perl may die on - # syntax errors (map1.t). - my @keyword_taking_list = qw( - and - chmod - chomp - chop - chown - dbmopen - die - elsif - exec - fcntl - for - foreach - formline - getsockopt - if - index - ioctl - join - kill - local - msgctl - msgrcv - msgsnd - my - open - or - our - pack - print - printf - push - read - readpipe - recv - return - reverse - rindex - seek - select - semctl - semget - send - setpriority - setsockopt - shmctl - shmget - shmread - shmwrite - socket - socketpair - sort - splice - split - sprintf - substr - syscall - sysopen - sysread - sysseek - system - syswrite - tie - unless - unlink - unpack - unshift - until - vec - warn - while - ); - @is_keyword_taking_list{@keyword_taking_list} = - (1) x scalar(@keyword_taking_list); - - # These are not used in any way yet - # my @unused_keywords = qw( - # CORE - # __FILE__ - # __LINE__ - # __PACKAGE__ - # ); - - # The list of keywords was extracted from function 'keyword' in - # perl file toke.c version 5.005.03, using this utility, plus a - # little editing: (file getkwd.pl): - # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } - # Add 'get' prefix where necessary, then split into the above lists. - # This list should be updated as necessary. - # The list should not contain these special variables: - # ARGV DATA ENV SIG STDERR STDIN STDOUT - # __DATA__ __END__ - - @is_keyword{@Keywords} = (1) x scalar(@Keywords); -} -1; -__END__ - -=head1 NAME - -Perl::Tidy - Parses and beautifies perl source - -=head1 SYNOPSIS - - use Perl::Tidy; - - Perl::Tidy::perltidy( - source => $source, - destination => $destination, - stderr => $stderr, - argv => $argv, - perltidyrc => $perltidyrc, - logfile => $logfile, - errorfile => $errorfile, - formatter => $formatter, # callback object (see below) - dump_options => $dump_options, - dump_options_type => $dump_options_type, - ); - -=head1 DESCRIPTION - -This module makes the functionality of the perltidy utility available to perl -scripts. Any or all of the input parameters may be omitted, in which case the -@ARGV array will be used to provide input parameters as described -in the perltidy(1) man page. - -For example, the perltidy script is basically just this: - - use Perl::Tidy; - Perl::Tidy::perltidy(); - -The module accepts input and output streams by a variety of methods. -The following list of parameters may be any of a the following: a -filename, an ARRAY reference, a SCALAR reference, or an object with -either a B or B method, as appropriate. - - source - the source of the script to be formatted - destination - the destination of the formatted output - stderr - standard error output - perltidyrc - the .perltidyrc file - logfile - the .LOG file stream, if any - errorfile - the .ERR file stream, if any - dump_options - ref to a hash to receive parameters (see below), - dump_options_type - controls contents of dump_options - dump_getopt_flags - ref to a hash to receive Getopt flags - dump_options_category - ref to a hash giving category of options - dump_abbreviations - ref to a hash giving all abbreviations - -The following chart illustrates the logic used to decide how to -treat a parameter. - - ref($param) $param is assumed to be: - ----------- --------------------- - undef a filename - SCALAR ref to string - ARRAY ref to array - (other) object with getline (if source) or print method - -If the parameter is an object, and the object has a B method, that -close method will be called at the end of the stream. - -=over 4 - -=item source - -If the B parameter is given, it defines the source of the -input stream. - -=item destination - -If the B parameter is given, it will be used to define the -file or memory location to receive output of perltidy. - -=item stderr - -The B parameter allows the calling program to capture the output -to what would otherwise go to the standard error output device. - -=item perltidyrc - -If the B file is given, it will be used instead of any -F<.perltidyrc> configuration file that would otherwise be used. - -=item argv - -If the B parameter is given, it will be used instead of the -B<@ARGV> array. The B parameter may be a string, a reference to a -string, or a reference to an array. If it is a string or reference to a -string, it will be parsed into an array of items just as if it were a -command line string. - -=item dump_options - -If the B parameter is given, it must be the reference to a hash. -In this case, the parameters contained in any perltidyrc configuration file -will be placed in this hash and perltidy will return immediately. This is -equivalent to running perltidy with --dump-options, except that the perameters -are returned in a hash rather than dumped to standard output. Also, by default -only the parameters in the perltidyrc file are returned, but this can be -changed (see the next parameter). This parameter provides a convenient method -for external programs to read a perltidyrc file. An example program using -this feature, F, is included in the distribution. - -Any combination of the B parameters may be used together. - -=item dump_options_type - -This parameter is a string which can be used to control the parameters placed -in the hash reference supplied by B. The possible values are -'perltidyrc' (default) and 'full'. The 'full' parameter causes both the -default options plus any options found in a perltidyrc file to be returned. - -=item dump_getopt_flags - -If the B parameter is given, it must be the reference to a -hash. This hash will receive all of the parameters that perltidy understands -and flags that are passed to Getopt::Long. This parameter may be -used alone or with the B flag. Perltidy will -exit immediately after filling this hash. See the demo program -F for example usage. - -=item dump_options_category - -If the B parameter is given, it must be the reference to a -hash. This hash will receive a hash with keys equal to all long parameter names -and values equal to the title of the corresponding section of the perltidy manual. -See the demo program F for example usage. - -=item dump_abbreviations - -If the B parameter is given, it must be the reference to a -hash. This hash will receive all abbreviations used by Perl::Tidy. See the -demo program F for example usage. - -=back - -=head1 EXAMPLE - -The following example passes perltidy a snippet as a reference -to a string and receives the result back in a reference to -an array. - - use Perl::Tidy; - - # some messy source code to format - my $source = <<'EOM'; - use strict; - my @editors=('Emacs', 'Vi '); my $rand = rand(); - print "A poll of 10 random programmers gave these results:\n"; - foreach(0..10) { - my $i=int ($rand+rand()); - print " $editors[$i] users are from Venus" . ", " . - "$editors[1-$i] users are from Mars" . - "\n"; - } - EOM - - # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY - my @dest; - perltidy( source => \$source, destination => \@dest ); - foreach (@dest) {print} - -=head1 Using the B Callback Object - -The B parameter is an optional callback object which allows -the calling program to receive tokenized lines directly from perltidy for -further specialized processing. When this parameter is used, the two -formatting options which are built into perltidy (beautification or -html) are ignored. The following diagram illustrates the logical flow: - - |-- (normal route) -> code beautification - caller->perltidy->|-- (-html flag ) -> create html - |-- (formatter given)-> callback to write_line - -This can be useful for processing perl scripts in some way. The -parameter C<$formatter> in the perltidy call, - - formatter => $formatter, - -is an object created by the caller with a C method which -will accept and process tokenized lines, one line per call. Here is -a simple example of a C which merely prints the line number, -the line type (as determined by perltidy), and the text of the line: - - sub write_line { - - # This is called from perltidy line-by-line - my $self = shift; - my $line_of_tokens = shift; - my $line_type = $line_of_tokens->{_line_type}; - my $input_line_number = $line_of_tokens->{_line_number}; - my $input_line = $line_of_tokens->{_line_text}; - print "$input_line_number:$line_type:$input_line"; - } - -The complete program, B, is contained in the examples section of -the source distribution. As this example shows, the callback method -receives a parameter B<$line_of_tokens>, which is a reference to a hash -of other useful information. This example uses these hash entries: - - $line_of_tokens->{_line_number} - the line number (1,2,...) - $line_of_tokens->{_line_text} - the text of the line - $line_of_tokens->{_line_type} - the type of the line, one of: - - SYSTEM - system-specific code before hash-bang line - CODE - line of perl code (including comments) - POD_START - line starting pod, such as '=head' - POD - pod documentation text - POD_END - last line of pod section, '=cut' - HERE - text of here-document - HERE_END - last line of here-doc (target word) - FORMAT - format section - FORMAT_END - last line of format section, '.' - DATA_START - __DATA__ line - DATA - unidentified text following __DATA__ - END_START - __END__ line - END - unidentified text following __END__ - ERROR - we are in big trouble, probably not a perl script - -Most applications will be only interested in lines of type B. For -another example, let's write a program which checks for one of the -so-called I C<&`>, C<$&>, and C<$'>, which -can slow down processing. Here is a B, from the example -program B, which does that: - - sub write_line { - - # This is called back from perltidy line-by-line - # We're looking for $`, $&, and $' - my ( $self, $line_of_tokens ) = @_; - - # pull out some stuff we might need - my $line_type = $line_of_tokens->{_line_type}; - my $input_line_number = $line_of_tokens->{_line_number}; - my $input_line = $line_of_tokens->{_line_text}; - my $rtoken_type = $line_of_tokens->{_rtoken_type}; - my $rtokens = $line_of_tokens->{_rtokens}; - chomp $input_line; - - # skip comments, pod, etc - return if ( $line_type ne 'CODE' ); - - # loop over tokens looking for $`, $&, and $' - for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) { - - # we only want to examine token types 'i' (identifier) - next unless $$rtoken_type[$j] eq 'i'; - - # pull out the actual token text - my $token = $$rtokens[$j]; - - # and check it - if ( $token =~ /^\$[\`\&\']$/ ) { - print STDERR - "$input_line_number: $token\n"; - } - } - } - -This example pulls out these tokenization variables from the $line_of_tokens -hash reference: - - $rtoken_type = $line_of_tokens->{_rtoken_type}; - $rtokens = $line_of_tokens->{_rtokens}; - -The variable C<$rtoken_type> is a reference to an array of token type codes, -and C<$rtokens> is a reference to a corresponding array of token text. -These are obviously only defined for lines of type B. -Perltidy classifies tokens into types, and has a brief code for each type. -You can get a complete list at any time by running perltidy from the -command line with - - perltidy --dump-token-types - -In the present example, we are only looking for tokens of type B -(identifiers), so the for loop skips past all other types. When an -identifier is found, its actual text is checked to see if it is one -being sought. If so, the above write_line prints the token and its -line number. - -The B feature is relatively new in perltidy, and further -documentation needs to be written to complete its description. However, -several example programs have been written and can be found in the -B section of the source distribution. Probably the best way -to get started is to find one of the examples which most closely matches -your application and start modifying it. - -For help with perltidy's pecular way of breaking lines into tokens, you -might run, from the command line, - - perltidy -D filename - -where F is a short script of interest. This will produce -F with interleaved lines of text and their token types. -The B<-D> flag has been in perltidy from the beginning for this purpose. -If you want to see the code which creates this file, it is -C in Tidy.pm. - -=head1 EXPORT - - &perltidy - -=head1 CREDITS - -Thanks to Hugh Myers who developed the initial modular interface -to perltidy. - -=head1 VERSION - -This man page documents Perl::Tidy version 20071205. - -=head1 AUTHOR - - Steve Hancock - perltidy at users.sourceforge.net - -=head1 SEE ALSO - -The perltidy(1) man page describes all of the features of perltidy. It -can be found at http://perltidy.sourceforge.net. - -=cut diff -r 61ae1529b238 -r 0f116c3002d7 indenters/perltidy --- a/indenters/perltidy Mon May 26 12:26:19 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2724 +0,0 @@ -#!/usr/bin/perl -package main; - -use lib "./"; -use lib "../indenters"; -use PerlTidyLib; - - -my $arg_string = undef; - -# give Macs a chance to provide command line parameters -if ($^O =~ /Mac/) { - $arg_string = - MacPerl::Ask( 'Please enter @ARGV (-h for help)', - defined $ARGV[0] ? "\"$ARGV[0]\"" : "" ); -} - -Perl::Tidy::perltidy(argv => $arg_string); - -__END__ - -=head1 NAME - -perltidy - a perl script indenter and reformatter - -=head1 SYNOPSIS - - perltidy [ options ] file1 file2 file3 ... - (output goes to file1.tdy, file2.tdy, file3.tdy, ...) - perltidy [ options ] file1 -o outfile - perltidy [ options ] file1 -st >outfile - perltidy [ options ] outfile - -=head1 DESCRIPTION - -Perltidy reads a perl script and writes an indented, reformatted script. - -Many users will find enough information in L<"EXAMPLES"> to get -started. New users may benefit from the short tutorial -which can be found at -http://perltidy.sourceforge.net/tutorial.html - -A convenient aid to systematically defining a set of style parameters -can be found at -http://perltidy.sourceforge.net/stylekey.html - -Perltidy can produce output on either of two modes, depending on the -existence of an B<-html> flag. Without this flag, the output is passed -through a formatter. The default formatting tries to follow the -recommendations in perlstyle(1), but it can be controlled in detail with -numerous input parameters, which are described in L<"FORMATTING -OPTIONS">. - -When the B<-html> flag is given, the output is passed through an HTML -formatter which is described in L<"HTML OPTIONS">. - -=head1 EXAMPLES - - perltidy somefile.pl - -This will produce a file F containing the script reformatted -using the default options, which approximate the style suggested in -perlstyle(1). Perltidy never changes the input file. - - perltidy *.pl - -Execute perltidy on all F<.pl> files in the current directory with the -default options. The output will be in files with an appended F<.tdy> -extension. For any file with an error, there will be a file with extension -F<.ERR>. - - perltidy -b file1.pl file2.pl - -Modify F and F in place, and backup the originals to -F and F. If F and/or F -already exist, they will be overwritten. - - perltidy -gnu somefile.pl - -Execute perltidy on file F with a style which approximates the -GNU Coding Standards for C programs. The output will be F. - - perltidy -i=3 somefile.pl - -Execute perltidy on file F, with 3 columns for each level of -indentation (B<-i=3>) instead of the default 4 columns. There will not be any -tabs in the reformatted script, except for any which already exist in comments, -pod documents, quotes, and here documents. Output will be F. - - perltidy -i=3 -et=8 somefile.pl - -Same as the previous example, except that leading whitespace will -be entabbed with one tab character per 8 spaces. - - perltidy -ce -l=72 somefile.pl - -Execute perltidy on file F with all defaults except use "cuddled -elses" (B<-ce>) and a maximum line length of 72 columns (B<-l=72>) instead of -the default 80 columns. - - perltidy -g somefile.pl - -Execute perltidy on file F and save a log file F -which shows the nesting of braces, parentheses, and square brackets at -the start of every line. - - perltidy -html somefile.pl - -This will produce a file F containing the script with -html markup. The output file will contain an embedded style sheet in -the section which may be edited to change the appearance. - - perltidy -html -css=mystyle.css somefile.pl - -This will produce a file F containing the script with -html markup. This output file will contain a link to a separate style -sheet file F. If the file F does not exist, -it will be created. If it exists, it will not be overwritten. - - perltidy -html -pre somefile.pl - -Write an html snippet with only the PRE section to F. -This is useful when code snippets are being formatted for inclusion in a -larger web page. No style sheet will be written in this case. - - perltidy -html -ss >mystyle.css - -Write a style sheet to F and exit. - - perltidy -html -frm mymodule.pm - -Write html with a frame holding a table of contents and the source code. The -output files will be F (the frame), F -(the table of contents), and F (the source code). - -=head1 OPTIONS - OVERVIEW - -The entire command line is scanned for options, and they are processed -before any files are processed. As a result, it does not matter -whether flags are before or after any filenames. However, the relative -order of parameters is important, with later parameters overriding the -values of earlier parameters. - -For each parameter, there is a long name and a short name. The short -names are convenient for keyboard input, while the long names are -self-documenting and therefore useful in scripts. It is customary to -use two leading dashes for long names, but one may be used. - -Most parameters which serve as on/off flags can be negated with a -leading "n" (for the short name) or a leading "no" or "no-" (for the -long name). For example, the flag to outdent long quotes is is B<-olq> -or B<--outdent-long-quotes>. The flag to skip this is B<-nolq> -or B<--nooutdent-long-quotes> or B<--no-outdent-long-quotes>. - -Options may not be bundled together. In other words, options B<-q> and -B<-g> may NOT be entered as B<-qg>. - -Option names may be terminated early as long as they are uniquely identified. -For example, instead of B<--dump-token-types>, it would be sufficient to enter -B<--dump-tok>, or even B<--dump-t>, to uniquely identify this command. - -=head2 I/O control - -The following parameters concern the files which are read and written. - -=over 4 - -=item B<-h>, B<--help> - -Show summary of usage and exit. - -=item B<-o>=filename, B<--outfile>=filename - -Name of the output file (only if a single input file is being -processed). If no output file is specified, and output is not -redirected to the standard output, the output will go to F. - -=item B<-st>, B<--standard-output> - -Perltidy must be able to operate on an arbitrarily large number of files -in a single run, with each output being directed to a different output -file. Obviously this would conflict with outputting to the single -standard output device, so a special flag, B<-st>, is required to -request outputting to the standard output. For example, - - perltidy somefile.pl -st >somefile.new.pl - -This option may only be used if there is just a single input file. -The default is B<-nst> or B<--nostandard-output>. - -=item B<-se>, B<--standard-error-output> - -If perltidy detects an error when processing file F, its -default behavior is to write error messages to file F. -Use B<-se> to cause all error messages to be sent to the standard error -output stream instead. This directive may be negated with B<-nse>. -Thus, you may place B<-se> in a F<.perltidyrc> and override it when -desired with B<-nse> on the command line. - -=item B<-oext>=ext, B<--output-file-extension>=ext - -Change the extension of the output file to be F instead of the -default F (or F in case the -B<-html> option is used). -See L. - -=item B<-opath>=path, B<--output-path>=path - -When perltidy creates a filename for an output file, by default it merely -appends an extension to the path and basename of the input file. This -parameter causes the path to be changed to F instead. - -The path should end in a valid path separator character, but perltidy will try -to add one if it is missing. - -For example - - perltidy somefile.pl -opath=/tmp/ - -will produce F. Otherwise, F will -appear in whatever directory contains F. - -If the path contains spaces, it should be placed in quotes. - -This parameter will be ignored if output is being directed to standard output, -or if it is being specified explicitly with the B<-o=s> parameter. - -=item B<-b>, B<--backup-and-modify-in-place> - -Modify the input file or files in-place and save the original with the -extension F<.bak>. Any existing F<.bak> file will be deleted. See next item -for changing the default backup extension. - -A B<-b> flag will be ignored if input is from standard input, or -if the B<-html> flag is set. - -=item B<-bext>=ext, B<--backup-file-extension>=ext - -Change the extension of the backup file to be something other than the -default F<.bak>. See L. - -=item B<-w>, B<--warning-output> - -Setting B<-w> causes any non-critical warning -messages to be reported as errors. These include messages -about possible pod problems, possibly bad starting indentation level, -and cautions about indirect object usage. The default, B<-nw> or -B<--nowarning-output>, is not to include these warnings. - -=item B<-q>, B<--quiet> - -Deactivate error messages and syntax checking (for running under -an editor). - -For example, if you use a vi-style editor, such as vim, you may execute -perltidy as a filter from within the editor using something like - - :n1,n2!perltidy -q - -where C represents the selected text. Without the B<-q> flag, -any error message may mess up your screen, so be prepared to use your -"undo" key. - -=item B<-log>, B<--logfile> - -Save the F<.LOG> file, which has many useful diagnostics. Perltidy always -creates a F<.LOG> file, but by default it is deleted unless a program bug is -suspected. Setting the B<-log> flag forces the log file to be saved. - -=item B<-g=n>, B<--logfile-gap=n> - -Set maximum interval between input code lines in the logfile. This purpose of -this flag is to assist in debugging nesting errors. The value of C is -optional. If you set the flag B<-g> without the value of C, it will be -taken to be 1, meaning that every line will be written to the log file. This -can be helpful if you are looking for a brace, paren, or bracket nesting error. - -Setting B<-g> also causes the logfile to be saved, so it is not necessary to -also include B<-log>. - -If no B<-g> flag is given, a value of 50 will be used, meaning that at least -every 50th line will be recorded in the logfile. This helps prevent -excessively long log files. - -Setting a negative value of C is the same as not setting B<-g> at all. - -=item B<-npro> B<--noprofile> - -Ignore any F<.perltidyrc> command file. Normally, perltidy looks first in -your current directory for a F<.perltidyrc> file of parameters. (The format -is described below). If it finds one, it applies those options to the -initial default values, and then it applies any that have been defined -on the command line. If no F<.perltidyrc> file is found, it looks for one -in your home directory. - -If you set the B<-npro> flag, perltidy will not look for this file. - -=item B<-pro=filename> or B<--profile=filename> - -To simplify testing and switching .perltidyrc files, this command may be -used to specify a configuration file which will override the default -name of .perltidyrc. There must not be a space on either side of the -'=' sign. For example, the line - - perltidy -pro=testcfg - -would cause file F to be used instead of the -default F<.perltidyrc>. - -=item B<-opt>, B<--show-options> - -Write a list of all options used to the F<.LOG> file. -Please see B<--dump-options> for a simpler way to do this. - -=item B<-f>, B<--force-read-binary> - -Force perltidy to process binary files. To avoid producing excessive -error messages, perltidy skips files identified by the system as non-text. -However, valid perl scripts containing binary data may sometimes be identified -as non-text, and this flag forces perltidy to process them. - -=back - -=head1 FORMATTING OPTIONS - -=head2 Basic Options - -=over 4 - -=item B<-l=n>, B<--maximum-line-length=n> - -The default maximum line length is n=80 characters. Perltidy will try -to find line break points to keep lines below this length. However, long -quotes and side comments may cause lines to exceed this length. -Setting B<-l=0> is equivalent to setting B<-l=(a large number)>. - -=item B<-i=n>, B<--indent-columns=n> - -Use n columns per indentation level (default n=4). - -=item tabs - -Using tab characters will almost certainly lead to future portability -and maintenance problems, so the default and recommendation is not to -use them. For those who prefer tabs, however, there are two different -options. - -Except for possibly introducing tab indentation characters, as outlined -below, perltidy does not introduce any tab characters into your file, -and it removes any tabs from the code (unless requested not to do so -with B<-fws>). If you have any tabs in your comments, quotes, or -here-documents, they will remain. - -=over 4 - -=item B<-et=n>, B<--entab-leading-whitespace> - -This flag causes each B initial space characters to be replaced by -one tab character. Note that the integer B is completely independent -of the integer specified for indentation parameter, B<-i=n>. - -=item B<-t>, B<--tabs> - -This flag causes one leading tab character to be inserted for each level -of indentation. Certain other features are incompatible with this -option, and if these options are also given, then a warning message will -be issued and this flag will be unset. One example is the B<-lp> -option. - -=back - -=item B<-syn>, B<--check-syntax> - -This flag causes perltidy to run C to check syntax of input -and output. (To change the flags passed to perl, see the next -item, B<-pscf>). The results are written to the F<.LOG> file, which -will be saved if an error is detected in the output script. The output -script is not checked if the input script has a syntax error. Perltidy -does its own checking, but this option employs perl to get a "second -opinion". - -If perl reports errors in the input file, they will not be reported in -the error output unless the B<--warning-output> flag is given. - -The default is B to do this type of syntax checking (although -perltidy will still do as much self-checking as possible). The reason -is that it causes all code in BEGIN blocks to be executed, for all -modules being used, and this opens the door to security issues and -infinite loops when running perltidy. - -=item B<-pscf=s>, B<-perl-syntax-check-flags=s> - -When perl is invoked to check syntax, the normal flags are C<-c -T>. In -addition, if the B<-x> flag is given to perltidy, then perl will also be -passed a B<-x> flag. It should not normally be necessary to change -these flags, but it can be done with the B<-pscf=s> flag. For example, -if the taint flag, C<-T>, is not wanted, the flag could be set to be just -B<-pscf=-c>. - -Perltidy will pass your string to perl with the exception that it will -add a B<-c> and B<-x> if appropriate. The F<.LOG> file will show -exactly what flags were passed to perl. - -=item B<-io>, B<--indent-only> - -This flag is used to deactivate all formatting and line break changes. -When it is in effect, the only change to the script will be indentation. -And any flags controlling whitespace and newlines will be ignored. You -might want to use this if you are perfectly happy with your whitespace -and line breaks, and merely want perltidy to handle the indentation. -(This also speeds up perltidy by well over a factor of two, so it might be -useful when perltidy is merely being used to help find a brace error in -a large script). - -Setting this flag is equivalent to setting B<--freeze-newlines> and -B<--freeze-whitespace>. - -=item B<-ole=s>, B<--output-line-ending=s> - -where s=C, C, C, or C. This flag tells perltidy -to output line endings for a specific system. Normally, -perltidy writes files with the line separator character of the host -system. The C and C flags have an identical result. - -=item B<-ple>, B<--preserve-line-endings> - -This flag tells perltidy to write its output files with the same line -endings as the input file, if possible. It should work for -B, B, and B line endings. It will only work if perltidy -input comes from a filename (rather than stdin, for example). If -perltidy has trouble determining the input file line ending, it will -revert to the default behavior of using the line ending of the host system. - -=back - -=head2 Code Indentation Control - -=over 4 - -=item B<-ci=n>, B<--continuation-indentation=n> - -Continuation indentation is extra indentation spaces applied when -a long line is broken. The default is n=2, illustrated here: - - my $level = # -ci=2 - ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level; - -The same example, with n=0, is a little harder to read: - - my $level = # -ci=0 - ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level; - -The value given to B<-ci> is also used by some commands when a small -space is required. Examples are commands for outdenting labels, -B<-ola>, and control keywords, B<-okw>. - -When default values are not used, it is suggested that the value B -given with B<-ci=n> be no more than about one-half of the number of -spaces assigned to a full indentation level on the B<-i=n> command. - -=item B<-sil=n> B<--starting-indentation-level=n> - -By default, perltidy examines the input file and tries to determine the -starting indentation level. While it is often zero, it may not be -zero for a code snippet being sent from an editing session. If the -default method does not work correctly, or you want to change the -starting level, use B<-sil=n>, to force the starting level to be n. - -=item List indentation using B<-lp>, B<--line-up-parentheses> - -By default, perltidy indents lists with 4 spaces, or whatever value -is specified with B<-i=n>. Here is a small list formatted in this way: - - # perltidy (default) - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - -Use the B<-lp> flag to add extra indentation to cause the data to begin -past the opening parentheses of a sub call or list, or opening square -bracket of an anonymous array, or opening curly brace of an anonymous -hash. With this option, the above list would become: - - # perltidy -lp - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - -If the available line length (see B<-l=n> ) does not permit this much -space, perltidy will use less. For alternate placement of the -closing paren, see the next section. - -This option has no effect on code BLOCKS, such as if/then/else blocks, -which always use whatever is specified with B<-i=n>. Also, the -existence of line breaks and/or block comments between the opening and -closing parens may cause perltidy to temporarily revert to its default -method. - -Note: The B<-lp> option may not be used together with the B<-t> tabs option. -It may, however, be used with the B<-et=n> tab method. - -In addition, any parameter which significantly restricts the ability of -perltidy to choose newlines will conflict with B<-lp> and will cause -B<-lp> to be deactivated. These include B<-io>, B<-fnl>, B<-nanl>, and -B<-ndnl>. The reason is that the B<-lp> indentation style can require -the careful coordination of an arbitrary number of break points in -hierarchical lists, and these flags may prevent that. - -=item B<-cti=n>, B<--closing-token-indentation> - -The B<-cti=n> flag controls the indentation of a line beginning with -a C<)>, C<]>, or a non-block C<}>. Such a line receives: - - -cti = 0 no extra indentation (default) - -cti = 1 extra indentation such that the closing token - aligns with its opening token. - -cti = 2 one extra indentation level if the line looks like: - ); or ]; or }; - -cti = 3 one extra indentation level always - -The flags B<-cti=1> and B<-cti=2> work well with the B<-lp> flag (previous -section). - - # perltidy -lp -cti=1 - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - - # perltidy -lp -cti=2 - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - -These flags are merely hints to the formatter and they may not always be -followed. In particular, if -lp is not being used, the indentation for -B is constrained to be no more than one indentation level. - -If desired, this control can be applied independently to each of the -closing container token types. In fact, B<-cti=n> is merely an -abbreviation for B<-cpi=n -csbi=n -cbi=n>, where: -B<-cpi> or B<--closing-paren-indentation> controls B<)>'s, -B<-csbi> or B<--closing-square-bracket-indentation> controls B<]>'s, -B<-cbi> or B<--closing-brace-indentation> controls non-block B<}>'s. - -=item B<-icp>, B<--indent-closing-paren> - -The B<-icp> flag is equivalent to -B<-cti=2>, described in the previous section. The B<-nicp> flag is -equivalent B<-cti=0>. They are included for backwards compatability. - -=item B<-icb>, B<--indent-closing-brace> - -The B<-icb> option gives one extra level of indentation to a brace which -terminates a code block . For example, - - if ($task) { - yyy(); - } # -icb - else { - zzz(); - } - -The default is not to do this, indicated by B<-nicb>. - -=item B<-olq>, B<--outdent-long-quotes> - -When B<-olq> is set, lines which is a quoted string longer than the -value B will have their indentation removed to make -them more readable. This is the default. To prevent such out-denting, -use B<-nolq> or B<--nooutdent-long-lines>. - -=item B<-oll>, B<--outdent-long-lines> - -This command is equivalent to B<--outdent-long-quotes> and -B<--outdent-long-comments>, and it is included for compatibility with previous -versions of perltidy. The negation of this also works, B<-noll> or -B<--nooutdent-long-lines>, and is equivalent to setting B<-nolq> and B<-nolc>. - -=item Outdenting Labels: B<-ola>, B<--outdent-labels> - -This command will cause labels to be outdented by 2 spaces (or whatever B<-ci> -has been set to), if possible. This is the default. For example: - - my $i; - LOOP: while ( $i = ) { - chomp($i); - next unless $i; - fixit($i); - } - -Use B<-nola> to not outdent labels. - -=item Outdenting Keywords - -=over 4 - -=item B<-okw>, B<--outdent-keywords> - -The command B<-okw> will will cause certain leading control keywords to -be outdented by 2 spaces (or whatever B<-ci> has been set to), if -possible. By default, these keywords are C, C, C, -C, and C. The intention is to make these control keywords -easier to see. To change this list of keywords being outdented, see -the next section. - -For example, using C on the previous example gives: - - my $i; - LOOP: while ( $i = ) { - chomp($i); - next unless $i; - fixit($i); - } - -The default is not to do this. - -=item Specifying Outdented Keywords: B<-okwl=string>, B<--outdent-keyword-list=string> - -This command can be used to change the keywords which are outdented with -the B<-okw> command. The parameter B is a required list of perl -keywords, which should be placed in quotes if there are more than one. -By itself, it does not cause any outdenting to occur, so the B<-okw> -command is still required. - -For example, the commands C<-okwl="next last redo goto" -okw> will cause -those four keywords to be outdented. It is probably simplest to place -any B<-okwl> command in a F<.perltidyrc> file. - -=back - -=back - -=head2 Whitespace Control - -Whitespace refers to the blank space between variables, operators, -and other code tokens. - -=over 4 - -=item B<-fws>, B<--freeze-whitespace> - -This flag causes your original whitespace to remain unchanged, and -causes the rest of the whitespace commands in this section, the -Code Indentation section, and -the Comment Control section to be ignored. - -=item Tightness of curly braces, parentheses, and square brackets. - -Here the term "tightness" will mean the closeness with which -pairs of enclosing tokens, such as parentheses, contain the quantities -within. A numerical value of 0, 1, or 2 defines the tightness, with -0 being least tight and 2 being most tight. Spaces within containers -are always symmetric, so if there is a space after a C<(> then there -will be a space before the corresponding C<)>. - -The B<-pt=n> or B<--paren-tightness=n> parameter controls the space within -parens. The example below shows the effect of the three possible -values, 0, 1, and 2: - - if ( ( my $len_tab = length( $tabstr ) ) > 0 ) { # -pt=0 - if ( ( my $len_tab = length($tabstr) ) > 0 ) { # -pt=1 (default) - if ((my $len_tab = length($tabstr)) > 0) { # -pt=2 - -When n is 0, there is always a space to the right of a '(' and to the left -of a ')'. For n=2 there is never a space. For n=1, the default, there -is a space unless the quantity within the parens is a single token, such -as an identifier or quoted string. - -Likewise, the parameter B<-sbt=n> or B<--square-bracket-tightness=n> -controls the space within square brackets, as illustrated below. - - $width = $col[ $j + $k ] - $col[ $j ]; # -sbt=0 - $width = $col[ $j + $k ] - $col[$j]; # -sbt=1 (default) - $width = $col[$j + $k] - $col[$j]; # -sbt=2 - -Curly braces which do not contain code blocks are controlled by -the parameter B<-bt=n> or B<--brace-tightness=n>. - - $obj->{ $parsed_sql->{ 'table' }[0] }; # -bt=0 - $obj->{ $parsed_sql->{'table'}[0] }; # -bt=1 (default) - $obj->{$parsed_sql->{'table'}[0]}; # -bt=2 - -And finally, curly braces which contain blocks of code are controlled by the -parameter B<-bbt=n> or B<--block-brace-tightness=n> as illustrated in the -example below. - - %bf = map { $_ => -M $_ } grep { /\.deb$/ } dirents '.'; # -bbt=0 (default) - %bf = map { $_ => -M $_ } grep {/\.deb$/} dirents '.'; # -bbt=1 - %bf = map {$_ => -M $_} grep {/\.deb$/} dirents '.'; # -bbt=2 - -=item B<-sts>, B<--space-terminal-semicolon> - -Some programmers prefer a space before all terminal semicolons. The -default is for no such space, and is indicated with B<-nsts> or -B<--nospace-terminal-semicolon>. - - $i = 1 ; # -sts - $i = 1; # -nsts (default) - -=item B<-sfs>, B<--space-for-semicolon> - -Semicolons within B loops may sometimes be hard to see, -particularly when commas are also present. This option places spaces on -both sides of these special semicolons, and is the default. Use -B<-nsfs> or B<--nospace-for-semicolon> to deactivate it. - - for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) { # -sfs (default) - for ( @a = @$ap, $u = shift @a; @a; $u = $v ) { # -nsfs - -=item B<-asc>, B<--add-semicolons> - -Setting B<-asc> allows perltidy to add any missing optional semicolon at the end -of a line which is followed by a closing curly brace on the next line. This -is the default, and may be deactivated with B<-nasc> or B<--noadd-semicolons>. - -=item B<-dsm>, B<--delete-semicolons> - -Setting B<-dsm> allows perltidy to delete extra semicolons which are -simply empty statements. This is the default, and may be deactivated -with B<-ndsm> or B<--nodelete-semicolons>. (Such semicolons are not -deleted, however, if they would promote a side comment to a block -comment). - -=item B<-aws>, B<--add-whitespace> - -Setting this option allows perltidy to add certain whitespace improve -code readability. This is the default. If you do not want any -whitespace added, but are willing to have some whitespace deleted, use -B<-naws>. (Use B<-fws> to leave whitespace completely unchanged). - -=item B<-dws>, B<--delete-old-whitespace> - -Setting this option allows perltidy to remove some old whitespace -between characters, if necessary. This is the default. If you -do not want any old whitespace removed, use B<-ndws> or -B<--nodelete-old-whitespace>. - -=item Detailed whitespace controls around tokens - -For those who want more detailed control over the whitespace around -tokens, there are four parameters which can directly modify the default -whitespace rules built into perltidy for any token. They are: - -B<-wls=s> or B<--want-left-space=s>, - -B<-nwls=s> or B<--nowant-left-space=s>, - -B<-wrs=s> or B<--want-right-space=s>, - -B<-nwrs=s> or B<--nowant-right-space=s>. - -These parameters are each followed by a quoted string, B, containing a -list of token types. No more than one of each of these parameters -should be specified, because repeating a command-line parameter -always overwrites the previous one before perltidy ever sees it. - -To illustrate how these are used, suppose it is desired that there be no -space on either side of the token types B<= + - / *>. The following two -parameters would specify this desire: - - -nwls="= + - / *" -nwrs="= + - / *" - -(Note that the token types are in quotes, and that they are separated by -spaces). With these modified whitespace rules, the following line of math: - - $root = -$b + sqrt( $b * $b - 4. * $a * $c ) / ( 2. * $a ); - -becomes this: - - $root=-$b+sqrt( $b*$b-4.*$a*$c )/( 2.*$a ); - -These parameters should be considered to be hints to perltidy rather -than fixed rules, because perltidy must try to resolve conflicts that -arise between them and all of the other rules that it uses. One -conflict that can arise is if, between two tokens, the left token wants -a space and the right one doesn't. In this case, the token not wanting -a space takes priority. - -It is necessary to have a list of all token types in order to create -this type of input. Such a list can be obtained by the command -B<--dump-token-types>. Also try the B<-D> flag on a short snippet of code -and look at the .DEBUG file to see the tokenization. - -B Be sure to put these tokens in quotes to avoid having them -misinterpreted by your command shell. - -=item Space between specific keywords and opening paren - -When an opening paren follows a Perl keyword, no space is introduced after the -keyword, unless it is (by default) one of these: - - my local our and or eq ne if else elsif until unless - while for foreach return switch case given when - -These defaults can be modified with two commands: - -B<-sak=s> or B<--space-after-keyword=s> adds keywords. - -B<-nsak=s> or B<--nospace-after-keyword=s> removes keywords. - -where B is a list of keywords (in quotes if necessary). For example, - - my ( $a, $b, $c ) = @_; # default - my( $a, $b, $c ) = @_; # -nsak="my local our" - -To put a space after all keywords, see the next item. - -=item Space between all keywords and opening parens - -When an opening paren follows a function or keyword, no space is introduced -after the keyword except for the keywords noted in the previous item. To -always put a space between a function or keyword and its opening paren, -use the command: - -B<-skp> or B<--space-keyword-paren> - -You will probably also want to use the flag B<-sfp> (next item) too. - -=item Space between all function names and opening parens - -When an opening paren follows a function the default is not to introduce -a space. To cause a space to be introduced use: - -B<-sfp> or B<--space-function-paren> - - myfunc( $a, $b, $c ); # default - myfunc ( $a, $b, $c ); # -sfp - -You will probably also want to use the flag B<-skp> (previous item) too. - -=item Trimming whitespace around C quotes - -B<-tqw> or B<--trim-qw> provide the default behavior of trimming -spaces around multi-line C quotes and indenting them appropriately. - -B<-ntqw> or B<--notrim-qw> cause leading and trailing whitespace around -multi-line C quotes to be left unchanged. This option will not -normally be necessary, but was added for testing purposes, because in -some versions of perl, trimming C quotes changes the syntax tree. - -=back - -=head2 Comment Controls - -Perltidy has a number of ways to control the appearance of both block comments -and side comments. The term B here refers to a full-line -comment, whereas B will refer to a comment which appears on a -line to the right of some code. - -=over 4 - -=item B<-ibc>, B<--indent-block-comments> - -Block comments normally look best when they are indented to the same -level as the code which follows them. This is the default behavior, but -you may use B<-nibc> to keep block comments left-justified. Here is an -example: - - # this comment is indented (-ibc, default) - if ($task) { yyy(); } - -The alternative is B<-nibc>: - - # this comment is not indented (-nibc) - if ($task) { yyy(); } - -See also the next item, B<-isbc>, as well as B<-sbc>, for other ways to -have some indented and some outdented block comments. - -=item B<-isbc>, B<--indent-spaced-block-comments> - -If there is no leading space on the line, then the comment will not be -indented, and otherwise it may be. - -If both B<-ibc> and B<-isbc> are set, then B<-isbc> takes priority. - -=item B<-olc>, B<--outdent-long-comments> - -When B<-olc> is set, lines which are full-line (block) comments longer -than the value B will have their indentation -removed. This is the default; use B<-nolc> to prevent outdenting. - -=item B<-msc=n>, B<--minimum-space-to-comment=n> - -Side comments look best when lined up several spaces to the right of -code. Perltidy will try to keep comments at least n spaces to the -right. The default is n=4 spaces. - -=item B<-fpsc=n>, B<--fixed-position-side-comment=n> - -This parameter tells perltidy to line up side comments in column number B -whenever possible. The default, n=0, is not do do this. - -=item B<-hsc>, B<--hanging-side-comments> - -By default, perltidy tries to identify and align "hanging side -comments", which are something like this: - - my $IGNORE = 0; # This is a side comment - # This is a hanging side comment - # And so is this - -A comment is considered to be a hanging side comment if (1) it immediately -follows a line with a side comment, or another hanging side comment, and -(2) there is some leading whitespace on the line. -To deactivate this feature, use B<-nhsc> or B<--nohanging-side-comments>. -If block comments are preceded by a blank line, or have no leading -whitespace, they will not be mistaken as hanging side comments. - -=item Closing Side Comments - -A closing side comment is a special comment which perltidy can -automatically create and place after the closing brace of a code block. -They can be useful for code maintenance and debugging. The command -B<-csc> (or B<--closing-side-comments>) adds or updates closing side -comments. For example, here is a small code snippet - - sub message { - if ( !defined( $_[0] ) ) { - print("Hello, World\n"); - } - else { - print( $_[0], "\n" ); - } - } - -And here is the result of processing with C: - - sub message { - if ( !defined( $_[0] ) ) { - print("Hello, World\n"); - } - else { - print( $_[0], "\n" ); - } - } ## end sub message - -A closing side comment was added for C in this case, but not -for the C and C blocks, because they were below the 6 line -cutoff limit for adding closing side comments. This limit may be -changed with the B<-csci> command, described below. - -The command B<-dcsc> (or B<--delete-closing-side-comments>) reverses this -process and removes these comments. - -Several commands are available to modify the behavior of these two basic -commands, B<-csc> and B<-dcsc>: - -=over 4 - -=item B<-csci=n>, or B<--closing-side-comment-interval=n> - -where C is the minimum number of lines that a block must have in -order for a closing side comment to be added. The default value is -C. To illustrate: - - # perltidy -csci=2 -csc - sub message { - if ( !defined( $_[0] ) ) { - print("Hello, World\n"); - } ## end if ( !defined( $_[0] )) - else { - print( $_[0], "\n" ); - } ## end else [ if ( !defined( $_[0] )) - } ## end sub message - -Now the C and C blocks are commented. However, now this has -become very cluttered. - -=item B<-cscp=string>, or B<--closing-side-comment-prefix=string> - -where string is the prefix used before the name of the block type. The -default prefix, shown above, is C<## end>. This string will be added to -closing side comments, and it will also be used to recognize them in -order to update, delete, and format them. Any comment identified as a -closing side comment will be placed just a single space to the right of -its closing brace. - -=item B<-cscl=string>, or B<--closing-side-comment-list-string> - -where C is a list of block types to be tagged with closing side -comments. By default, all code block types preceded by a keyword or -label (such as C, C, and so on) will be tagged. The B<-cscl> -command changes the default list to be any selected block types; see -L. -For example, the following command -requests that only C's, labels, C, and C blocks be -affected by any B<-csc> or B<-dcsc> operation: - - -cscl="sub : BEGIN END" - -=item B<-csct=n>, or B<--closing-side-comment-maximum-text=n> - -The text appended to certain block types, such as an C block, is -whatever lies between the keyword introducing the block, such as C, -and the opening brace. Since this might be too much text for a side -comment, there needs to be a limit, and that is the purpose of this -parameter. The default value is C, meaning that no additional -tokens will be appended to this text after its length reaches 20 -characters. Omitted text is indicated with C<...>. (Tokens, including -sub names, are never truncated, however, so actual lengths may exceed -this). To illustrate, in the above example, the appended text of the -first block is C< ( !defined( $_[0] )...>. The existing limit of -C caused this text to be truncated, as indicated by the C<...>. - -=item B<-csce=n>, or B<--closing-side-comment-else-flag=n> - -The default, B, places the text of the opening C statement after any -terminal C. - -If B is used, then each C is also given the text of the opening -C statement. Also, an C will include the text of a preceding -C statement. Note that this may result some long closing -side comments. - -If B is used, the results will be the same as B whenever the -resulting line length is less than the maximum allowed. - -=item B<-cscw>, or B<--closing-side-comment-warnings> - -This parameter is intended to help make the initial transition to the use of -closing side comments. -It causes two -things to happen if a closing side comment replaces an existing, different -closing side comment: first, an error message will be issued, and second, the -original side comment will be placed alone on a new specially marked comment -line for later attention. - -The intent is to avoid clobbering existing hand-written side comments -which happen to match the pattern of closing side comments. This flag -should only be needed on the first run with B<-csc>. - -=back - -B - -=over 4 - -=item * - -Closing side comments are only placed on lines terminated with a closing -brace. Certain closing styles, such as the use of cuddled elses -(B<-ce>), preclude the generation of some closing side comments. - -=item * - -Please note that adding or deleting of closing side comments takes -place only through the commands B<-csc> or B<-dcsc>. The other commands, -if used, merely modify the behavior of these two commands. - -=item * - -It is recommended that the B<-cscw> flag be used along with B<-csc> on -the first use of perltidy on a given file. This will prevent loss of -any existing side comment data which happens to have the csc prefix. - -=item * - -Once you use B<-csc>, you should continue to use it so that any -closing side comments remain correct as code changes. Otherwise, these -comments will become incorrect as the code is updated. - -=item * - -If you edit the closing side comments generated by perltidy, you must also -change the prefix to be different from the closing side comment prefix. -Otherwise, your edits will be lost when you rerun perltidy with B<-csc>. For -example, you could simply change C<## end> to be C<## End>, since the test is -case sensitive. You may also want to use the B<-ssc> flag to keep these -modified closing side comments spaced the same as actual closing side comments. - -=item * - -Temporarily generating closing side comments is a useful technique for -exploring and/or debugging a perl script, especially one written by someone -else. You can always remove them with B<-dcsc>. - -=back - -=item Static Block Comments - -Static block comments are block comments with a special leading pattern, -C<##> by default, which will be treated slightly differently from other -block comments. They effectively behave as if they had glue along their -left and top edges, because they stick to the left edge and previous line -when there is no blank spaces in those places. This option is -particularly useful for controlling how commented code is displayed. - -=over 4 - -=item B<-sbc>, B<--static-block-comments> - -When B<-sbc> is used, a block comment with a special leading pattern, C<##> by -default, will be treated specially. - -Comments so identified are treated as follows: - -=over 4 - -=item * - -If there is no leading space on the line, then the comment will not -be indented, and otherwise it may be, - -=item * - -no new blank line will be -inserted before such a comment, and - -=item * - -such a comment will never become -a hanging side comment. - -=back - -For example, assuming C<@month_of_year> is -left-adjusted: - - @month_of_year = ( # -sbc (default) - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', - ## 'Dec', 'Nov' - 'Nov', 'Dec'); - -Without this convention, the above code would become - - @month_of_year = ( # -nsbc - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', - - ## 'Dec', 'Nov' - 'Nov', 'Dec' - ); - -which is not as clear. -The default is to use B<-sbc>. This may be deactivated with B<-nsbc>. - -=item B<-sbcp=string>, B<--static-block-comment-prefix=string> - -This parameter defines the prefix used to identify static block comments -when the B<-sbc> parameter is set. The default prefix is C<##>, -corresponding to C<-sbcp=##>. The prefix is actually part of a perl -pattern used to match lines and it must either begin with C<#> or C<^#>. -In the first case a prefix ^\s* will be added to match any leading -whitespace, while in the second case the pattern will match only -comments with no leading whitespace. For example, to -identify all comments as static block comments, one would use C<-sbcp=#>. -To identify all left-adjusted comments as static block comments, use C<-sbcp='^#'>. - -Please note that B<-sbcp> merely defines the pattern used to identify static -block comments; it will not be used unless the switch B<-sbc> is set. Also, -please be aware that since this string is used in a perl regular expression -which identifies these comments, it must enable a valid regular expression to -be formed. - -A pattern which can be useful is: - - -sbcp=^#{2,}[^\s#] - -This pattern requires a static block comment to have at least one character -which is neither a # nor a space. It allows a line containing only '#' -characters to be rejected as a static block comment. Such lines are often used -at the start and end of header information in subroutines and should not be -separated from the intervening comments, which typically begin with just a -single '#'. - -=item B<-osbc>, B<--outdent-static-block-comments> - -The command B<-osbc> will will cause static block comments to be outdented by 2 -spaces (or whatever B<-ci=n> has been set to), if possible. - -=back - -=item Static Side Comments - -Static side comments are side comments with a special leading pattern. -This option can be useful for controlling how commented code is displayed -when it is a side comment. - -=over 4 - -=item B<-ssc>, B<--static-side-comments> - -When B<-ssc> is used, a side comment with a static leading pattern, which is -C<##> by default, will be be spaced only a single space from previous -character, and it will not be vertically aligned with other side comments. - -The default is B<-nssc>. - -=item B<-sscp=string>, B<--static-side-comment-prefix=string> - -This parameter defines the prefix used to identify static side comments -when the B<-ssc> parameter is set. The default prefix is C<##>, -corresponding to C<-sscp=##>. - -Please note that B<-sscp> merely defines the pattern used to identify -static side comments; it will not be used unless the switch B<-ssc> is -set. Also, note that this string is used in a perl regular expression -which identifies these comments, so it must enable a valid regular -expression to be formed. - -=back - - -=back - -=head2 Skipping Selected Sections of Code - -Selected lines of code may be passed verbatim to the output without any -formatting. This feature is enabled by default but can be disabled with -the B<--noformat-skipping> or B<-nfs> flag. It should be used sparingly to -avoid littering code with markers, but it might be helpful for working -around occasional problems. For example it might be useful for keeping -the indentation of old commented code unchanged, keeping indentation of -long blocks of aligned comments unchanged, keeping certain list -formatting unchanged, or working around a glitch in perltidy. - -=over 4 - -=item B<-fs>, B<--format-skipping> - -This flag, which is enabled by default, causes any code between -special beginning and ending comment markers to be passed to the -output without formatting. The default beginning marker is #<<< -and the default ending marker is #>>> but they -may be changed (see next items below). Additional text may appear on -these special comment lines provided that it is separated from the -marker by at least one space. For example - - #<<< do not let perltidy touch this - my @list = (1, - 1, 1, - 1, 2, 1, - 1, 3, 3, 1, - 1, 4, 6, 4, 1,); - #>>> - -The comment markers may be placed at any location that a block comment may -appear. If they do not appear to be working, use the -log flag and examine the -F<.LOG> file. Use B<-nfs> to disable this feature. - -=item B<-fsb=string>, B<--format-skipping-begin=string> - -The B<-fsb=string> parameter may be used to change the beginning marker for -format skipping. The default is equivalent to -fsb='#<<<'. The string that -you enter must begin with a # and should be in quotes as necessary to get past -the command shell of your system. It is actually the leading text of a pattern -that is constructed by appending a '\s', so you must also include backslashes -for characters to be taken literally rather than as patterns. - -Some examples show how example strings become patterns: - - -fsb='#\{\{\{' becomes /^#\{\{\{\s/ which matches #{{{ but not #{{{{ - -fsb='#\*\*' becomes /^#\*\*\s/ which matches #** but not #*** - -fsb='#\*{2,}' becomes /^#\*{2,}\s/ which matches #** and #***** - -=item B<-fse=string>, B<--format-skipping-end=string> - -The B<-fsb=string> is the corresponding parameter used to change the -ending marker for format skipping. The default is equivalent to --fse='#<<<'. - -=back - -=head2 Line Break Control - -=over 4 - -=item B<-fnl>, B<--freeze-newlines> - -If you do not want any changes to the line breaks in your script, set -B<-fnl>, and they will remain fixed, and the rest of the commands in -this section and sections -L, -L, and -L will be ignored. You may want to use B<-noll> -with this. - -=item B<-ce>, B<--cuddled-else> - -Enable the "cuddled else" style, in which C and C are -follow immediately after the curly brace closing the previous block. -The default is not to use cuddled elses, and is indicated with the flag -B<-nce> or B<--nocuddled-else>. Here is a comparison of the -alternatives: - - if ($task) { - yyy(); - } else { # -ce - zzz(); - } - - if ($task) { - yyy(); - } - else { # -nce (default) - zzz(); - } - -=item B<-bl>, B<--opening-brace-on-new-line> - -Use the flag B<-bl> to place the opening brace on a new line: - - if ( $input_file eq '-' ) # -bl - { - important_function(); - } - -This flag applies to all structural blocks, including sub's (unless -the B<-sbl> flag is set -- see next item). - -The default style, B<-nbl>, places an opening brace on the same line as -the keyword introducing it. For example, - - if ( $input_file eq '-' ) { # -nbl (default) - -=item B<-sbl>, B<--opening-sub-brace-on-new-line> - -The flag B<-sbl> can be used to override the value of B<-bl> for -opening sub braces. For example, - - perltidy -sbl - -produces this result: - - sub message - { - if (!defined($_[0])) { - print("Hello, World\n"); - } - else { - print($_[0], "\n"); - } - } - -This flag is negated with B<-nsbl>. If B<-sbl> is not specified, -the value of B<-bl> is used. - -=item B<-bli>, B<--brace-left-and-indent> - -The flag B<-bli> is the same as B<-bl> but in addition it causes one -unit of continuation indentation ( see B<-ci> ) to be placed before -an opening and closing block braces. - -For example, - - if ( $input_file eq '-' ) # -bli - { - important_function(); - } - -By default, this extra indentation occurs for blocks of type: -B, B, B, B, B, B, B, -B, B, and also with a preceding label. The next item -shows how to change this. - -=item B<-blil=s>, B<--brace-left-and-indent-list=s> - -Use this parameter to change the types of block braces for which the -B<-bli> flag applies; see L. For example, -B<-blil='if elsif else'> would apply it to only C blocks. - -=item B<-bar>, B<--opening-brace-always-on-right> - -The default style, B<-nbl> places the opening code block brace on a new -line if it does not fit on the same line as the opening keyword, like -this: - - if ( $bigwasteofspace1 && $bigwasteofspace2 - || $bigwasteofspace3 && $bigwasteofspace4 ) - { - big_waste_of_time(); - } - -To force the opening brace to always be on the right, use the B<-bar> -flag. In this case, the above example becomes - - if ( $bigwasteofspace1 && $bigwasteofspace2 - || $bigwasteofspace3 && $bigwasteofspace4 ) { - big_waste_of_time(); - } - -A conflict occurs if both B<-bl> and B<-bar> are specified. - -=item B<-otr>, B<--opening-token-right> and related flags - -The B<-otr> flag is a hint that perltidy should not place a break between a -comma and an opening token. For example: - - # default formatting - push @{ $self->{$module}{$key} }, - { - accno => $ref->{accno}, - description => $ref->{description} - }; - - # perltidy -otr - push @{ $self->{$module}{$key} }, { - accno => $ref->{accno}, - description => $ref->{description} - }; - -The flag B<-otr> is actually a synonym for three other flags -which can be used to control parens, hash braces, and square brackets -separately if desired: - - -opr or --opening-paren-right - -ohbr or --opening-hash-brace-right - -osbr or --opening-square-bracket-right - -=item Vertical tightness of non-block curly braces, parentheses, and square brackets. - -These parameters control what shall be called vertical tightness. Here are the -main points: - -=over 4 - -=item * - -Opening tokens (except for block braces) are controlled by B<-vt=n>, or -B<--vertical-tightness=n>, where - - -vt=0 always break a line after opening token (default). - -vt=1 do not break unless this would produce more than one - step in indentation in a line. - -vt=2 never break a line after opening token - -=item * - -You must also use the B<-lp> flag when you use the B<-vt> flag; the -reason is explained below. - -=item * - -Closing tokens (except for block braces) are controlled by B<-vtc=n>, or -B<--vertical-tightness-closing=n>, where - - -vtc=0 always break a line before a closing token (default), - -vtc=1 do not break before a closing token which is followed - by a semicolon or another closing token, and is not in - a list environment. - -vtc=2 never break before a closing token. - -The rules for B<-vtc=1> are designed to maintain a reasonable balance -between tightness and readability in complex lists. - -=item * - -Different controls may be applied to to different token types, -and it is also possible to control block braces; see below. - -=item * - -Finally, please note that these vertical tightness flags are merely -hints to the formatter, and it cannot always follow them. Things which -make it difficult or impossible include comments, blank lines, blocks of -code within a list, and possibly the lack of the B<-lp> parameter. -Also, these flags may be ignored for very small lists (2 or 3 lines in -length). - -=back - -Here are some examples: - - # perltidy -lp -vt=0 -vtc=0 - %romanNumerals = ( - one => 'I', - two => 'II', - three => 'III', - four => 'IV', - ); - - # perltidy -lp -vt=1 -vtc=0 - %romanNumerals = ( one => 'I', - two => 'II', - three => 'III', - four => 'IV', - ); - - # perltidy -lp -vt=1 -vtc=1 - %romanNumerals = ( one => 'I', - two => 'II', - three => 'III', - four => 'IV', ); - -The difference between B<-vt=1> and B<-vt=2> is shown here: - - # perltidy -lp -vt=1 - $init->add( - mysprintf( "(void)find_threadsv(%s);", - cstring( $threadsv_names[ $op->targ ] ) - ) - ); - - # perltidy -lp -vt=2 - $init->add( mysprintf( "(void)find_threadsv(%s);", - cstring( $threadsv_names[ $op->targ ] ) - ) - ); - -With B<-vt=1>, the line ending in C does not combine with the next -line because the next line is not balanced. This can help with -readability, but B<-vt=2> can be used to ignore this rule. - -The tightest, and least readable, code is produced with both C<-vt=2> and -C<-vtc=2>: - - # perltidy -lp -vt=2 -vtc=2 - $init->add( mysprintf( "(void)find_threadsv(%s);", - cstring( $threadsv_names[ $op->targ ] ) ) ); - -Notice how the code in all of these examples collapses vertically as -B<-vt> increases, but the indentation remains unchanged. This is -because perltidy implements the B<-vt> parameter by first formatting as -if B<-vt=0>, and then simply overwriting one output line on top of the -next, if possible, to achieve the desired vertical tightness. The -B<-lp> indentation style has been designed to allow this vertical -collapse to occur, which is why it is required for the B<-vt> parameter. - -The B<-vt=n> and B<-vtc=n> parameters apply to each type of container -token. If desired, vertical tightness controls can be applied -independently to each of the closing container token types. - -The parameters for controlling parentheses are B<-pvt=n> or -B<--paren-vertical-tightness=n>, and B<-pcvt=n> or -B<--paren-vertical-tightness-closing=n>. - -Likewise, the parameters for square brackets are B<-sbvt=n> or -B<--square-bracket-vertical-tightness=n>, and B<-sbcvt=n> or -B<--square-bracket-vertical-tightness-closing=n>. - -Finally, the parameters for controlling non-code block braces are -B<-bvt=n> or B<--brace-vertical-tightness=n>, and B<-bcvt=n> or -B<--brace-vertical-tightness-closing=n>. - -In fact, the parameter B<-vt=n> is actually just an abbreviation for -B<-pvt=n -bvt=n sbvt=n>, and likewise B<-vtc=n> is an abbreviation -for B<-pvtc=n -bvtc=n sbvtc=n>. - -=item B<-bbvt=n> or B<--block-brace-vertical-tightness=n> - -The B<-bbvt=n> flag is just like the B<-vt=n> flag but applies -to opening code block braces. - - -bbvt=0 break after opening block brace (default). - -bbvt=1 do not break unless this would produce more than one - step in indentation in a line. - -bbvt=2 do not break after opening block brace. - -It is necessary to also use either B<-bl> or B<-bli> for this to work, -because, as with other vertical tightness controls, it is implemented by -simply overwriting a line ending with an opening block brace with the -subsequent line. For example: - - # perltidy -bli -bbvt=0 - if ( open( FILE, "< $File" ) ) - { - while ( $File = ) - { - $In .= $File; - $count++; - } - close(FILE); - } - - # perltidy -bli -bbvt=1 - if ( open( FILE, "< $File" ) ) - { while ( $File = ) - { $In .= $File; - $count++; - } - close(FILE); - } - -By default this applies to blocks associated with keywords B, -B, B, B, B, B, B, B, -B, and also with a preceding label. This can be changed with -the parameter B<-bbvtl=string>, or -B<--block-brace-vertical-tightness-list=string>, where B is a -space-separated list of block types. For more information on the -possible values of this string, see L - -For example, if we want to just apply this style to C, -C, and C blocks, we could use -C. - -There is no vertical tightness control for closing block braces; with -the exception of one-line blocks, they will normally remain on a -separate line. - -=item B<-sot>, B<--stack-opening-tokens> and related flags - -The B<-sot> flag tells perltidy to "stack" opening tokens -when possible to avoid lines with isolated opening tokens. - -For example: - - # default - $opt_c = Text::CSV_XS->new( - { - binary => 1, - sep_char => $opt_c, - always_quote => 1, - } - ); - - # -sot - $opt_c = Text::CSV_XS->new( { - binary => 1, - sep_char => $opt_c, - always_quote => 1, - } - ); - -For detailed control of individual closing tokens the following -controls can be used: - - -sop or --stack-opening-paren - -sohb or --stack-opening-hash-brace - -sosb or --stack-opening-square-bracket - -The flag B<-sot> is a synonym for B<-sop -sohb -sosb>. - -=item B<-sct>, B<--stack-closing-tokens> and related flags - -The B<-sct> flag tells perltidy to "stack" closing tokens -when possible to avoid lines with isolated closing tokens. - -For example: - - # default - $opt_c = Text::CSV_XS->new( - { - binary => 1, - sep_char => $opt_c, - always_quote => 1, - } - ); - - # -sct - $opt_c = Text::CSV_XS->new( - { - binary => 1, - sep_char => $opt_c, - always_quote => 1, - } ); - -The B<-sct> flag is somewhat similar to the B<-vtc> flags, and in some -cases it can give a similar result. The difference is that the B<-vtc> -flags try to avoid lines with leading opening tokens by "hiding" them at -the end of a previous line, whereas the B<-sct> flag merely tries to -reduce the number of lines with isolated closing tokens by stacking them -but does not try to hide them. For example: - - # -vtc=2 - $opt_c = Text::CSV_XS->new( - { - binary => 1, - sep_char => $opt_c, - always_quote => 1, } ); - -For detailed control of the stacking of individual closing tokens the -following controls can be used: - - -scp or --stack-closing-paren - -schb or --stack-closing-hash-brace - -scsb or --stack-closing-square-bracket - -The flag B<-sct> is a synonym for B<-scp -schb -scsb>. - -=item B<-dnl>, B<--delete-old-newlines> - -By default, perltidy first deletes all old line break locations, and then it -looks for good break points to match the desired line length. Use B<-ndnl> -or B<--nodelete-old-newlines> to force perltidy to retain all old line break -points. - -=item B<-anl>, B<--add-newlines> - -By default, perltidy will add line breaks when necessary to create -continuations of long lines and to improve the script appearance. Use -B<-nanl> or B<--noadd-newlines> to prevent any new line breaks. - -This flag does not prevent perltidy from eliminating existing line -breaks; see B<--freeze-newlines> to completely prevent changes to line -break points. - -=item Controlling whether perltidy breaks before or after operators - -Four command line parameters provide some control over whether -a line break should be before or after specific token types. -Two parameters give detailed control: - -B<-wba=s> or B<--want-break-after=s>, and - -B<-wbb=s> or B<--want-break-before=s>. - -These parameters are each followed by a quoted string, B, containing -a list of token types (separated only by spaces). No more than one of each -of these parameters should be specified, because repeating a -command-line parameter always overwrites the previous one before -perltidy ever sees it. - -By default, perltidy breaks B these token types: - % + - * / x != == >= <= =~ !~ < > | & - = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= - -And perltidy breaks B these token types by default: - . << >> -> && || // - -To illustrate, to cause a break after a concatenation operator, C<'.'>, -rather than before it, the command line would be - - -wba="." - -As another example, the following command would cause a break before -math operators C<'+'>, C<'-'>, C<'/'>, and C<'*'>: - - -wbb="+ - / *" - -These commands should work well for most of the token types that perltidy uses -(use B<--dump-token-types> for a list). Also try the B<-D> flag on a short -snippet of code and look at the .DEBUG file to see the tokenization. However, -for a few token types there may be conflicts with hardwired logic which cause -unexpected results. One example is curly braces, which should be controlled -with the parameter B provided for that purpose. - -B Be sure to put these tokens in quotes to avoid having them -misinterpreted by your command shell. - -Two additional parameters are available which, though they provide no further -capability, can simplify input are: - -B<-baao> or B<--break-after-all-operators>, - -B<-bbao> or B<--break-before-all-operators>. - -The -baao sets the default to be to break after all of the following operators: - - % + - * / x != == >= <= =~ !~ < > | & - = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= - . : ? && || and or err xor - -and the B<-bbao> flag sets the default to break before all of these operators. -These can be used to define an initial break preference which can be fine-tuned -with the B<-wba> and B<-wbb> flags. For example, to break before all operators -except an B<=> one could use --bbao -wba='=' rather than listing every -single perl operator except B<=> on a -wbb flag. - -=back - -=head2 Controlling List Formatting - -Perltidy attempts to place comma-separated arrays of values in tables -which look good. Its default algorithms usually work well, and they -have been improving with each release, but several parameters are -available to control list formatting. - -=over 4 - -=item B<-boc>, B<--break-at-old-comma-breakpoints> - -This flag tells perltidy to try to break at all old commas. This is not -the default. Normally, perltidy makes a best guess at list formatting, -and seldom uses old comma breakpoints. Usually this works well, -but consider: - - my @list = (1, - 1, 1, - 1, 2, 1, - 1, 3, 3, 1, - 1, 4, 6, 4, 1,); - -The default formatting will flatten this down to one line: - - # perltidy (default) - my @list = ( 1, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 4, 6, 4, 1, ); - -which hides the structure. Using B<-boc>, plus additional flags -to retain the original style, yields - - # perltidy -boc -lp -pt=2 -vt=1 -vtc=1 - my @list = (1, - 1, 1, - 1, 2, 1, - 1, 3, 3, 1, - 1, 4, 6, 4, 1,); - -A disadvantage of this flag is that all tables in the file -must already be nicely formatted. For another possibility see -the -fs flag in L. - -=item B<-mft=n>, B<--maximum-fields-per-table=n> - -If the computed number of fields for any table exceeds B, then it -will be reduced to B. The default value for B is a large number, -40. While this value should probably be left unchanged as a general -rule, it might be used on a small section of code to force a list to -have a particular number of fields per line, and then either the B<-boc> -flag could be used to retain this formatting, or a single comment could -be introduced somewhere to freeze the formatting in future applications -of perltidy. - - # perltidy -mft=2 - @month_of_year = ( - 'Jan', 'Feb', - 'Mar', 'Apr', - 'May', 'Jun', - 'Jul', 'Aug', - 'Sep', 'Oct', - 'Nov', 'Dec' - ); - -=item B<-cab=n>, B<--comma-arrow-breakpoints=n> - -A comma which follows a comma arrow, '=>', requires special -consideration. In a long list, it is common to break at all such -commas. This parameter can be used to control how perltidy breaks at -these commas. (However, it will have no effect if old comma breaks are -being forced because B<-boc> is used). The possible values of B are: - - n=0 break at all commas after => - n=1 stable: break at all commas after => unless this would break - an existing one-line container (default) - n=2 break at all commas after =>, but try to form the maximum - maximum one-line container lengths - n=3 do not treat commas after => specially at all - -For example, given the following single line, perltidy by default will -not add any line breaks because it would break the existing one-line -container: - - bless { B => $B, Root => $Root } => $package; - -Using B<-cab=0> will force a break after each comma-arrow item: - - # perltidy -cab=0: - bless { - B => $B, - Root => $Root - } => $package; - -If perltidy is subsequently run with this container broken, then by -default it will break after each '=>' because the container is now -broken. To reform a one-line container, the parameter B<-cab=2> would -be needed. - -The flag B<-cab=3> can be used to prevent these commas from being -treated specially. In this case, an item such as "01" => 31 is -treated as a single item in a table. The number of fields in this table -will be determined by the same rules that are used for any other table. -Here is an example. - - # perltidy -cab=3 - my %last_day = ( - "01" => 31, "02" => 29, "03" => 31, "04" => 30, - "05" => 31, "06" => 30, "07" => 31, "08" => 31, - "09" => 30, "10" => 31, "11" => 30, "12" => 31 - ); - -=back - -=head2 Retaining or Ignoring Existing Line Breaks - -Several additional parameters are available for controlling the extent -to which line breaks in the input script influence the output script. -In most cases, the default parameter values are set so that, if a choice -is possible, the output style follows the input style. For example, if -a short logical container is broken in the input script, then the -default behavior is for it to remain broken in the output script. - -Most of the parameters in this section would only be required for a -one-time conversion of a script from short container lengths to longer -container lengths. The opposite effect, of converting long container -lengths to shorter lengths, can be obtained by temporarily using a short -maximum line length. - -=over 4 - -=item B<-bol>, B<--break-at-old-logical-breakpoints> - -By default, if a logical expression is broken at a C<&&>, C<||>, C, -or C, then the container will remain broken. Also, breaks -at internal keywords C and C will normally be retained. -To prevent this, and thus form longer lines, use B<-nbol>. - -=item B<-bok>, B<--break-at-old-keyword-breakpoints> - -By default, perltidy will retain a breakpoint before keywords which may -return lists, such as C and . This allows chains of these -operators to be displayed one per line. Use B<-nbok> to prevent -retaining these breakpoints. - -=item B<-bot>, B<--break-at-old-ternary-breakpoints> - -By default, if a conditional (ternary) operator is broken at a C<:>, -then it will remain broken. To prevent this, and thereby -form longer lines, use B<-nbot>. - -=item B<-iob>, B<--ignore-old-breakpoints> - -Use this flag to tell perltidy to ignore existing line breaks to the -maximum extent possible. This will tend to produce the longest possible -containers, regardless of type, which do not exceed the line length -limit. - -=item B<-kis>, B<--keep-interior-semicolons> - -Use the B<-kis> flag to prevent breaking at a semicolon if -there was no break there in the input file. Normally -perltidy places a newline after each semicolon which -terminates a statement unless several statements are -contained within a one-line brace block. To illustrate, -consider the following input lines: - - dbmclose(%verb_delim); undef %verb_delim; - dbmclose(%expanded); undef %expanded; - -The default is to break after each statement, giving - - dbmclose(%verb_delim); - undef %verb_delim; - dbmclose(%expanded); - undef %expanded; - -With B the multiple statements are retained: - - dbmclose(%verb_delim); undef %verb_delim; - dbmclose(%expanded); undef %expanded; - -The statements are still subject to the specified value -of B and will be broken if this -maximum is exceeed. - -=back - -=head2 Blank Line Control - -Blank lines can improve the readability of a script if they are carefully -placed. Perltidy has several commands for controlling the insertion, -retention, and removal of blank lines. - -=over 4 - -=item B<-bbc>, B<--blanks-before-comments> - -A blank line will be introduced before a full-line comment. This is the -default. Use B<-nbbc> or B<--noblanks-before-comments> to prevent -such blank lines from being introduced. - -=item B<-bbs>, B<--blanks-before-subs> - -A blank line will be introduced before a B definition, unless it is a -one-liner or preceded by a comment. A blank line will also be introduced -before a B statement and a B and B block. This is the -default. The intention is to help display the structure of a program by -setting off certain key sections of code. This is negated with B<-nbbs> or -B<--noblanks-before-subs>. - -=item B<-bbb>, B<--blanks-before-blocks> - -A blank line will be introduced before blocks of coding delimited by -B, B, B, B, and B, B, in the following -circumstances: - -=over 4 - -=item * - -The block is not preceded by a comment. - -=item * - -The block is not a one-line block. - -=item * - -The number of consecutive non-blank lines at the current indentation depth is at least B<-lbl> -(see next section). - -=back - -This is the default. The intention of this option is to introduce -some space within dense coding. -This is negated with B<-nbbb> or B<--noblanks-before-blocks>. - -=item B<-lbl=n> B<--long-block-line-count=n> - -This controls how often perltidy is allowed to add blank lines before -certain block types (see previous section). The default is 8. Entering -a value of B<0> is equivalent to entering a very large number. - -=item B<-mbl=n> B<--maximum-consecutive-blank-lines=n> - -This parameter specifies the maximum number of consecutive blank lines -in the output script. The default is n=1. If the input file has more -than n consecutive blank lines, the number will be reduced to n. -(This obviously does not apply to pod sections, here-documents, and quotes). - -=item B<-sob>, B<--swallow-optional-blank-lines> - -All blank lines not required by the above flags, B<-bbb>, B<-bbs>, and B<-bbc>, -will be deleted. (But essential blank lines above pod documents will be -retained). This is NOT the default. - -=item B<-nsob>, B<--noswallow-optional-blank-lines> - -Retain blank lines, including those which do not corresponding to flags -B<-bbb>, B<-bbs>, and B<-bbc>. This is the default. The number of -blanks retained is subject to the limit imposed by -B<--maximum-consecutive-blank-lines>, however. - -=back - -=head2 Styles - -A style refers to a convenient collection of existing parameters. - -=over 4 - -=item B<-gnu>, B<--gnu-style> - -B<-gnu> gives an approximation to the GNU Coding Standards (which do -not apply to perl) as they are sometimes implemented. At present, this -style overrides the default style with the following parameters: - - -lp -bl -noll -pt=2 -bt=2 -sbt=2 -icp - -=item B<-pbp>, B<--perl-best-practices> - -B<-pbp> is an abbreviation for the parameters in the book B -by Damian Conway: - - -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq - -wbb="% + - * / x != == >= <= =~ !~ < > | & = - **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=" - -Note that the -st and -se flags make perltidy act as a filter on one file only. -These can be overridden with -nst and -nse if necessary. - -=back - -=head2 Other Controls - -=over 4 - -=item Deleting selected text - -Perltidy can selectively delete comments and/or pod documentation. The -command B<-dac> or B<--delete-all-comments> will delete all comments -B all pod documentation, leaving just code and any leading system -control lines. - -The command B<-dp> or B<--delete-pod> will remove all pod documentation -(but not comments). - -Two commands which remove comments (but not pod) are: B<-dbc> or -B<--delete-block-comments> and B<-dsc> or B<--delete-side-comments>. -(Hanging side comments will be deleted with block comments here.) - -The negatives of these commands also work, and are the defaults. When -block comments are deleted, any leading 'hash-bang' will be retained. -Also, if the B<-x> flag is used, any system commands before a leading -hash-bang will be retained (even if they are in the form of comments). - -=item Writing selected text to a file - -When perltidy writes a formatted text file, it has the ability to also -send selected text to a file with a F<.TEE> extension. This text can -include comments and pod documentation. - -The command B<-tac> or B<--tee-all-comments> will write all comments -B all pod documentation. - -The command B<-tp> or B<--tee-pod> will write all pod documentation (but -not comments). - -The commands which write comments (but not pod) are: B<-tbc> or -B<--tee-block-comments> and B<-tsc> or B<--tee-side-comments>. -(Hanging side comments will be written with block comments here.) - -The negatives of these commands also work, and are the defaults. - -=item Using a F<.perltidyrc> command file - -If you use perltidy frequently, you probably won't be happy until you -create a F<.perltidyrc> file to avoid typing commonly-used parameters. -Perltidy will first look in your current directory for a command file -named F<.perltidyrc>. If it does not find one, it will continue looking -for one in other standard locations. - -These other locations are system-dependent, and may be displayed with -the command C. Under Unix systems, it will first look -for an environment variable B. Then it will look for a -F<.perltidyrc> file in the home directory, and then for a system-wide -file F, and then it will look for -F. Note that these last two system-wide files do not -have a leading dot. Further system-dependent information will be found -in the INSTALL file distributed with perltidy. - -This file is free format, and simply a list of parameters, just as they -would be entered on a command line. Any number of lines may be used, -with any number of parameters per line, although it may be easiest to -read with one parameter per line. Blank lines are ignored, and text -after a '#' is ignored to the end of a line. - -Here is an example of a F<.perltidyrc> file: - - # This is a simple of a .perltidyrc configuration file - # This implements a highly spaced style - -se # errors to standard error output - -w # show all warnings - -bl # braces on new lines - -pt=0 # parens not tight at all - -bt=0 # braces not tight - -sbt=0 # square brackets not tight - -The parameters in the F<.perltidyrc> file are installed first, so any -parameters given on the command line will have priority over them. - -To avoid confusion, perltidy ignores any command in the .perltidyrc -file which would cause some kind of dump and an exit. These are: - - -h -v -ddf -dln -dop -dsn -dtt -dwls -dwrs -ss - -There are several options may be helpful in debugging a F<.perltidyrc> -file: - -=over 4 - -=item * - -A very helpful command is B<--dump-profile> or B<-dpro>. It writes a -list of all configuration filenames tested to standard output, and -if a file is found, it dumps the content to standard output before -exiting. So, to find out where perltidy looks for its configuration -files, and which one if any it selects, just enter - - perltidy -dpro - -=item * - -It may be simplest to develop and test configuration files with -alternative names, and invoke them with B<-pro=filename> on the command -line. Then rename the desired file to F<.perltidyrc> when finished. - -=item * - -The parameters in the F<.perltidyrc> file can be switched off with -the B<-npro> option. - -=item * - -The commands B<--dump-options>, B<--dump-defaults>, B<--dump-long-names>, -and B<--dump-short-names>, all described below, may all be helpful. - -=back - -=item Creating a new abbreviation - -A special notation is available for use in a F<.perltidyrc> file -for creating an abbreviation for a group -of options. This can be used to create a -shorthand for one or more styles which are frequently, but not always, -used. The notation is to group the options within curly braces which -are preceded by the name of the alias (without leading dashes), like this: - - newword { - -opt1 - -opt2 - } - -where B is the abbreviation, and B, etc, are existing parameters -I. The main syntax requirement is that -the new abbreviation must begin on a new line. -Space before and after the curly braces is optional. -For a -specific example, the following line - - airy {-bl -pt=0 -bt=0 -sbt=0} - -could be placed in a F<.perltidyrc> file, and then invoked at will with - - perltidy -airy somefile.pl - -(Either C<-airy> or C<--airy> may be used). - -=item Skipping leading non-perl commands with B<-x> or B<--look-for-hash-bang> - -If your script has leading lines of system commands or other text which -are not valid perl code, and which are separated from the start of the -perl code by a "hash-bang" line, ( a line of the form C<#!...perl> ), -you must use the B<-x> flag to tell perltidy not to parse and format any -lines before the "hash-bang" line. This option also invokes perl with a --x flag when checking the syntax. This option was originally added to -allow perltidy to parse interactive VMS scripts, but it should be used -for any script which is normally invoked with C. - -=item Making a file unreadable - -The goal of perltidy is to improve the readability of files, but there -are two commands which have the opposite effect, B<--mangle> and -B<--extrude>. They are actually -merely aliases for combinations of other parameters. Both of these -strip all possible whitespace, but leave comments and pod documents, -so that they are essentially reversible. The -difference between these is that B<--mangle> puts the fewest possible -line breaks in a script while B<--extrude> puts the maximum possible. -Note that these options do not provided any meaningful obfuscation, because -perltidy can be used to reformat the files. They were originally -developed to help test the tokenization logic of perltidy, but they -have other uses. -One use for B<--mangle> is the following: - - perltidy --mangle myfile.pl -st | perltidy -o myfile.pl.new - -This will form the maximum possible number of one-line blocks (see next -section), and can sometimes help clean up a badly formatted script. - -A similar technique can be used with B<--extrude> instead of B<--mangle> -to make the minimum number of one-line blocks. - -Another use for B<--mangle> is to combine it with B<-dac> to reduce -the file size of a perl script. - -=item One-line blocks - -There are a few points to note regarding one-line blocks. A one-line -block is something like this, - - if ($x > 0) { $y = 1 / $x } - -where the contents within the curly braces is short enough to fit -on a single line. - -With few exceptions, perltidy retains existing one-line blocks, if it -is possible within the line-length constraint, but it does not attempt -to form new ones. In other words, perltidy will try to follow the -one-line block style of the input file. - -If an existing one-line block is longer than the maximum line length, -however, it will be broken into multiple lines. When this happens, perltidy -checks for and adds any optional terminating semicolon (unless the B<-nasc> -option is used) if the block is a code block. - -The main exception is that perltidy will attempt to form new one-line -blocks following the keywords C, C, and C, because -these code blocks are often small and most clearly displayed in a single -line. - -One-line block rules can conflict with the cuddled-else option. When -the cuddled-else option is used, perltidy retains existing one-line -blocks, even if they do not obey cuddled-else formatting. - -Occasionally, when one-line blocks get broken because they exceed the -available line length, the formatting will violate the requested brace style. -If this happens, reformatting the script a second time should correct -the problem. - -=item Debugging - -The following flags are available for debugging: - -B<--dump-defaults> or B<-ddf> will write the default option set to standard output and quit - -B<--dump-profile> or B<-dpro> will write the name of the current -configuration file and its contents to standard output and quit. - -B<--dump-options> or B<-dop> will write current option set to standard -output and quit. - -B<--dump-long-names> or B<-dln> will write all command line long names (passed -to Get_options) to standard output and quit. - -B<--dump-short-names> or B<-dsn> will write all command line short names -to standard output and quit. - -B<--dump-token-types> or B<-dtt> will write a list of all token types -to standard output and quit. - -B<--dump-want-left-space> or B<-dwls> will write the hash %want_left_space -to standard output and quit. See the section on controlling whitespace -around tokens. - -B<--dump-want-right-space> or B<-dwrs> will write the hash %want_right_space -to standard output and quit. See the section on controlling whitespace -around tokens. - -B<-DEBUG> will write a file with extension F<.DEBUG> for each input file -showing the tokenization of all lines of code. - -=item Working with MakeMaker, AutoLoader and SelfLoader - -The first $VERSION line of a file which might be eval'd by MakeMaker -is passed through unchanged except for indentation. -Use B<--nopass-version-line>, or B<-npvl>, to deactivate this feature. - -If the AutoLoader module is used, perltidy will continue formatting -code after seeing an __END__ line. -Use B<--nolook-for-autoloader>, or B<-nlal>, to deactivate this feature. - -Likewise, if the SelfLoader module is used, perltidy will continue formatting -code after seeing a __DATA__ line. -Use B<--nolook-for-selfloader>, or B<-nlsl>, to deactivate this feature. - -=item Working around problems with older version of Perl - -Perltidy contains a number of rules which help avoid known subtleties -and problems with older versions of perl, and these rules always -take priority over whatever formatting flags have been set. For example, -perltidy will usually avoid starting a new line with a bareword, because -this might cause problems if C is active. - -There is no way to override these rules. - -=back - -=head1 HTML OPTIONS - -=over 4 - -=item The B<-html> master switch - -The flag B<-html> causes perltidy to write an html file with extension -F<.html>. So, for example, the following command - - perltidy -html somefile.pl - -will produce a syntax-colored html file named F -which may be viewed with a browser. - -B: In this case, perltidy does not do any formatting to the -input file, and it does not write a formatted file with extension -F<.tdy>. This means that two perltidy runs are required to create a -fully reformatted, html copy of a script. - -=item The B<-pre> flag for code snippets - -When the B<-pre> flag is given, only the pre-formatted section, within -the
 and 
tags, will be output. This simplifies inclusion -of the output in other files. The default is to output a complete -web page. - -=item The B<-nnn> flag for line numbering - -When the B<-nnn> flag is given, the output lines will be numbered. - -=item The B<-toc>, or B<--html-table-of-contents> flag - -By default, a table of contents to packages and subroutines will be -written at the start of html output. Use B<-ntoc> to prevent this. -This might be useful, for example, for a pod document which contains a -number of unrelated code snippets. This flag only influences the code -table of contents; it has no effect on any table of contents produced by -pod2html (see next item). - -=item The B<-pod>, or B<--pod2html> flag - -There are two options for formatting pod documentation. The default is -to pass the pod through the Pod::Html module (which forms the basis of -the pod2html utility). Any code sections are formatted by perltidy, and -the results then merged. Note: perltidy creates a temporary file when -Pod::Html is used; see L<"FILES">. Also, Pod::Html creates temporary -files for its cache. - -NOTE: Perltidy counts the number of C<=cut> lines, and either moves the -pod text to the top of the html file if there is one C<=cut>, or leaves -the pod text in its original order (interleaved with code) otherwise. - -Most of the flags accepted by pod2html may be included in the perltidy -command line, and they will be passed to pod2html. In some cases, -the flags have a prefix C to emphasize that they are for the -pod2html, and this prefix will be removed before they are passed to -pod2html. The flags which have the additional C prefix are: - - --[no]podheader --[no]podindex --[no]podrecurse --[no]podquiet - --[no]podverbose --podflush - -The flags which are unchanged from their use in pod2html are: - - --backlink=s --cachedir=s --htmlroot=s --libpods=s --title=s - --podpath=s --podroot=s - -where 's' is an appropriate character string. Not all of these flags are -available in older versions of Pod::Html. See your Pod::Html documentation for -more information. - -The alternative, indicated with B<-npod>, is not to use Pod::Html, but -rather to format pod text in italics (or whatever the stylesheet -indicates), without special html markup. This is useful, for example, -if pod is being used as an alternative way to write comments. - -=item The B<-frm>, or B<--frames> flag - -By default, a single html output file is produced. This can be changed -with the B<-frm> option, which creates a frame holding a table of -contents in the left panel and the source code in the right side. This -simplifies code browsing. Assume, for example, that the input file is -F. Then, for default file extension choices, these three -files will be created: - - MyModule.pm.html - the frame - MyModule.pm.toc.html - the table of contents - MyModule.pm.src.html - the formatted source code - -Obviously this file naming scheme requires that output be directed to a real -file (as opposed to, say, standard output). If this is not the -case, or if the file extension is unknown, the B<-frm> option will be -ignored. - -=item The B<-text=s>, or B<--html-toc-extension> flag - -Use this flag to specify the extra file extension of the table of contents file -when html frames are used. The default is "toc". -See L. - -=item The B<-sext=s>, or B<--html-src-extension> flag - -Use this flag to specify the extra file extension of the content file when html -frames are used. The default is "src". -See L. - -=item The B<-hent>, or B<--html-entities> flag - -This flag controls the use of Html::Entities for html formatting. By -default, the module Html::Entities is used to encode special symbols. -This may not be the right thing for some browser/language -combinations. Use --nohtml-entities or -nhent to prevent this. - -=item Style Sheets - -Style sheets make it very convenient to control and adjust the -appearance of html pages. The default behavior is to write a page of -html with an embedded style sheet. - -An alternative to an embedded style sheet is to create a page with a -link to an external style sheet. This is indicated with the -B<-css=filename>, where the external style sheet is F. The -external style sheet F will be created if and only if it does -not exist. This option is useful for controlling multiple pages from a -single style sheet. - -To cause perltidy to write a style sheet to standard output and exit, -use the B<-ss>, or B<--stylesheet>, flag. This is useful if the style -sheet could not be written for some reason, such as if the B<-pre> flag -was used. Thus, for example, - - perltidy -html -ss >mystyle.css - -will write a style sheet with the default properties to file -F. - -The use of style sheets is encouraged, but a web page without a style -sheets can be created with the flag B<-nss>. Use this option if you -must to be sure that older browsers (roughly speaking, versions prior to -4.0 of Netscape Navigator and Internet Explorer) can display the -syntax-coloring of the html files. - -=item Controlling HTML properties - -Note: It is usually more convenient to accept the default properties -and then edit the stylesheet which is produced. However, this section -shows how to control the properties with flags to perltidy. - -Syntax colors may be changed from their default values by flags of the either -the long form, B<-html-color-xxxxxx=n>, or more conveniently the short form, -B<-hcx=n>, where B is one of the following words, and B is the -corresponding abbreviation: - - Token Type xxxxxx x - ---------- -------- -- - comment comment c - number numeric n - identifier identifier i - bareword, function bareword w - keyword keyword k - quite, pattern quote q - here doc text here-doc-text h - here doc target here-doc-target hh - punctuation punctuation pu - parentheses paren p - structural braces structure s - semicolon semicolon sc - colon colon co - comma comma cm - label label j - sub definition name subroutine m - pod text pod-text pd - -A default set of colors has been defined, but they may be changed by providing -values to any of the following parameters, where B is either a 6 digit -hex RGB color value or an ascii name for a color, such as 'red'. - -To illustrate, the following command will produce an html -file F with "aqua" keywords: - - perltidy -html -hck=00ffff somefile.pl - -and this should be equivalent for most browsers: - - perltidy -html -hck=aqua somefile.pl - -Perltidy merely writes any non-hex names that it sees in the html file. -The following 16 color names are defined in the HTML 3.2 standard: - - black => 000000, - silver => c0c0c0, - gray => 808080, - white => ffffff, - maroon => 800000, - red => ff0000, - purple => 800080, - fuchsia => ff00ff, - green => 008000, - lime => 00ff00, - olive => 808000, - yellow => ffff00 - navy => 000080, - blue => 0000ff, - teal => 008080, - aqua => 00ffff, - -Many more names are supported in specific browsers, but it is safest -to use the hex codes for other colors. Helpful color tables can be -located with an internet search for "HTML color tables". - -Besides color, two other character attributes may be set: bold, and italics. -To set a token type to use bold, use the flag -B<--html-bold-xxxxxx> or B<-hbx>, where B or B are the long -or short names from the above table. Conversely, to set a token type to -NOT use bold, use B<--nohtml-bold-xxxxxx> or B<-nhbx>. - -Likewise, to set a token type to use an italic font, use the flag -B<--html-italic-xxxxxx> or B<-hix>, where again B or B are the -long or short names from the above table. And to set a token type to -NOT use italics, use B<--nohtml-italic-xxxxxx> or B<-nhix>. - -For example, to use bold braces and lime color, non-bold, italics keywords the -following command would be used: - - perltidy -html -hbs -hck=00FF00 -nhbk -hik somefile.pl - -The background color can be specified with B<--html-color-background=n>, -or B<-hcbg=n> for short, where n is a 6 character hex RGB value. The -default color of text is the value given to B, which is -black as a default. - -Here are some notes and hints: - -1. If you find a preferred set of these parameters, you may want -to create a F<.perltidyrc> file containing them. See the perltidy man -page for an explanation. - -2. Rather than specifying values for these parameters, it is probably -easier to accept the defaults and then edit a style sheet. The style -sheet contains comments which should make this easy. - -3. The syntax-colored html files can be very large, so it may be best to -split large files into smaller pieces to improve download times. - -=back - -=head1 SOME COMMON INPUT CONVENTIONS - -=head2 Specifying Block Types - -Several parameters which refer to code block types may be customized by also -specifying an associated list of block types. The type of a block is the name -of the keyword which introduces that block, such as B, B, or B. -An exception is a labeled block, which has no keyword, and should be specified -with just a colon. - -For example, the following parameter specifies C, labels, C, and -C blocks: - - -cscl="sub : BEGIN END" - -(the meaning of the -cscl parameter is described above.) Note that -quotes are required around the list of block types because of the -spaces. - -=head2 Specifying File Extensions - -Several parameters allow default file extensions to be overridden. For -example, a backup file extension may be specified with B<-bext=ext>, -where B is some new extension. In order to provides the user some -flexibility, the following convention is used in all cases to decide if -a leading '.' should be used. If the extension C begins with -C, C, or C<0-9>, then it will be appended to the filename with -an intermediate '.' (or perhaps an '_' on VMS systems). Otherwise, it -will be appended directly. - -For example, suppose the file is F. For C<-bext=old>, a '.' is -added to give F. For C<-bext=.old>, no additional '.' is -added, so again the backup file is F. For C<-bext=~>, then no -dot is added, and the backup file will be F . - -=head1 SWITCHES WHICH MAY BE NEGATED - -The following list shows all short parameter names which allow a prefix -'n' to produce the negated form: - - D anl asc aws b bbb bbc bbs bl bli boc bok bol bot ce - csc dac dbc dcsc ddf dln dnl dop dp dpro dsc dsm dsn dtt dwls - dwrs dws f fll frm fs hsc html ibc icb icp iob isbc lal log - lp lsl ohbr okw ola oll opr opt osbr otr ple ple pod pvl q - sbc sbl schb scp scsb sct se sfp sfs skp sob sohb sop sosb sot - ssc st sts syn t tac tbc toc tp tqw tsc w x bar kis - -Equivalently, the prefix 'no' or 'no-' on the corresponding long names may be -used. - -=head1 LIMITATIONS - -=over 4 - -=item Parsing Limitations - -Perltidy should work properly on most perl scripts. It does a lot of -self-checking, but still, it is possible that an error could be -introduced and go undetected. Therefore, it is essential to make -careful backups and to test reformatted scripts. - -The main current limitation is that perltidy does not scan modules -included with 'use' statements. This makes it necessary to guess the -context of any bare words introduced by such modules. Perltidy has good -guessing algorithms, but they are not infallible. When it must guess, -it leaves a message in the log file. - -If you encounter a bug, please report it. - -=item What perltidy does not parse and format - -Perltidy indents but does not reformat comments and C quotes. -Perltidy does not in any way modify the contents of here documents or -quoted text, even if they contain source code. (You could, however, -reformat them separately). Perltidy does not format 'format' sections -in any way. And, of course, it does not modify pod documents. - -=back - -=head1 FILES - -=over 4 - -=item Temporary files - -Under the -html option with the default --pod2html flag, a temporary file is -required to pass text to Pod::Html. Unix systems will try to use the POSIX -tmpnam() function. Otherwise the file F will be temporarily -created in the current working directory. - -=item Special files when standard input is used - -When standard input is used, the log file, if saved, is F, -and any errors are written to F unless the B<-se> flag is -set. These are saved in the current working directory. - -=item Files overwritten - -The following file extensions are used by perltidy, and files with these -extensions may be overwritten or deleted: F<.ERR>, F<.LOG>, F<.TEE>, -and/or F<.tdy>, F<.html>, and F<.bak>, depending on the run type and -settings. - -=item Files extensions limitations - -Perltidy does not operate on files for which the run could produce a file with -a duplicated file extension. These extensions include F<.LOG>, F<.ERR>, -F<.TEE>, and perhaps F<.tdy> and F<.bak>, depending on the run type. The -purpose of this rule is to prevent generating confusing filenames such as -F. - -=back - -=head1 SEE ALSO - -perlstyle(1), Perl::Tidy(3) - -=head1 VERSION - -This man page documents perltidy version 20071205. - -=head1 CREDITS - -Michael Cartmell supplied code for adaptation to VMS and helped with -v-strings. - -Yves Orton supplied code for adaptation to the various versions -of Windows. - -Axel Rose supplied a patch for MacPerl. - -Hugh S. Myers designed and implemented the initial Perl::Tidy module interface. - -Many others have supplied key ideas, suggestions, and bug reports; -see the CHANGES file. - -=head1 AUTHOR - - Steve Hancock - email: perltidy at users.sourceforge.net - http://perltidy.sourceforge.net - -=head1 COPYRIGHT - -Copyright (c) 2000-2006 by Steve Hancock - -=head1 LICENSE - -This package is free software; you can redistribute it and/or modify it -under the terms of the "GNU General Public License". - -Please refer to the file "COPYING" for details. - -=head1 DISCLAIMER - -This package is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -See the "GNU General Public License" for more details.