#	Module  :	recurs
#	Author  :	H. Piske
#	Written :	07.12.2000
#	Purpose :	provide dos globbing that actually works :)

=explain

in non-Microsoft worlds, life is easy.
In Unix, there are no drive letters. 
In EPOC, foo is a file and foo\ is a directory.
Modern Windows combines all the disadvantages:
- a path can have a leading drive letter
- it might have a \\server notation instead
- there might be a file extension or not, so should * match a dot?
- there might be a dot that does not indicate an extension
- there is no case sensitivity
- a directory name may end in a backslash
- there may be slashes instead of backslashes
- x: means current dir of drive x to dos, but root of x in Perl

This module tries to deal with all of these odds. I wrote it out of
frustration over several other glob-modules that all failed at one
point or another. One crashed on files with single quotes, one
forgot directories with spaces, and so on.

Apart from the glob routine, there is recurs to glob into sub-dirs
and a couple of helpers.

sub glob($pattern);				# may include base path, e.g. c:\windows\*.dll
								# returns list of matching files with the base
								# path prepended (not just the file names)

sub recurs($pattern);			# may include base path
								# matches files in ALL subdirectories of base
				# can not deal with wild carded dir names
				# e.g. c:\win*\foo.dll must be done with
				  undef @_; push @_, glob "$_\\foo.dll" for glob 'c:\\win*';
				# at least if you don't know how long it takes to
				  grep /^c:\\win.*\\foo\.dll$/, recurs 'c:\*';

sub filematch($pattern, @list);	# grep for filenames. Awkward if path included
								# (read sub recurs on how it's done there)
								# returns list of matches

sub pathname(pathname);			# separate path from filename
								# returns ($path, $file)
								# $path is never empty (ie '.' for current dir)

sub pathcat(@components);		# put path components together again
								# (no, simple string concat won't do)
								# returns scalar

=cut

package recurs;

BEGIN
{
	use Exporter;
	use vars qw(@ISA @EXPORT);
	@ISA = qw(Exporter);
	@EXPORT = qw(filematch pathcat pathname abspath glob recurs);
}
return 1;

##############################################################################
#	grep for file names
#
sub filematch
{
	local $_ = shift;
	s/(.)\.\*$/$1/;		# chop trailing .* except if there's nothing else
	s/\./\\./g;			# escape dots
	s/\*/.*/g;			# dos wild card * is .* in rex
	s/\?/./g;			# dos wild card ? is .  in rex
	s/^$/.*/;			# nothing is all
	local $wild = $_;
	return grep /^$wild$/i		# now go and match
		&& !/^\.\.?$/, @_		# exclude '.' and '..' entries
		unless /\\\.$/;			# search pattern ending in dot?
	$wild =~ s/..$//;			# y: rats, that's dos special
	grep /^$wild$/i				# match the name without the dot
		&& !/\.\S{1,3}$/		# but there should be no standard extension
		&& !/^\.\.?$/, @_;		# e.g. more than 3 chars is not an ext
}								# I know, this fails on 'foo.jpeg'

##############################################################################
#	glue parts of a path together to make a file reference
#
sub pathcat
{
	local $_ = join '=', @_;	# '=' should not be in it
	y!/\t\r\n!\\!d;				# turn / into \\ and kill tabs and line endings
	s/^=*//;					# skip leading ...
	s/=*$//;					# ... and trailing ...
	while (s/==/=/) {};			# ... and intermittent empty components
	s/:=/:/ unless /:=\\/;		# 'x:' means current dir of x:, not 'x:\'
	s/^\\\\/>/;					# and leading \\ is network reference
	y/=/\\/;					# apart from that, make all delimiters \
	while (s/\\\\/\\/) {};		# turn \\ into \ until there are no more \\
	while (s/\\\.\\/\\/) {};	# make all \.\ simple \
	s/^>\\?/\\\\/;				# after that, network refs are OK again
	s/(:\\|\\)\.$/$1/;			# drop trailing \. (make it \, chop later)
	s/^(.:)?\.\\/$1/;			# drop leading .\ (x:.\foo is simply x:foo)
	chop if /[^:]\\$/;			# chop trailing \ unless it's x:\ or just \
	s/:$/:./;					# make sure trailing : means x:. and not x:\
	return $_ || '.';			# finally, nothing means current dir
}

##############################################################################
#	split a file reference into directory reference and file name therein
#
sub pathname
{
	local $_ = shift;
#print;
	local $name = '';
	s!/!\\!g;
	if (-d)								# if target is dir, there is no file name
	{
		chop if /[^:]\\$/;				# just strip trailing \
	} else {
		/^(.:\\?)(\.$)?([^\\]*)$/		# leading drive letter (f:xxx or f:\xxx)
										# special case f:. reads as f:
	||	/(.*)(\\)([^\\]*)$/				# name is everything behind last backslash
	||	/()()(.*)/;						# no path if neither : nor \\
#print "  \$1=$1  \$2=$2  \$3=$3\n";
		$name = $3;
		$_ = $1 || $2 || '.';			# \\ if leading \\, current dir if no path
	}
	s/:$/:./;							# use f:. for f: to avoid getting f:\
#print "  dir=$_  name=$name\n";
	return ($_, $name);
}

##############################################################################
#	get a nice, solid, absolute path
#
sub abspath
{
	local $_ = shift;
	s!/!\\!;
	$_ = substr (`cd`, 0, 2) . $_ unless /^(.:|\\\\)/;	# prepend drive if missing
	/^(.:)?(.*)$/;
	&pathcat ($2 =~ /^\\/ ? $_ : `cd $1`, $2);
	# this seems like a wild construct:
	# only if the match fails, are $1 and $2 still from the previous line.
	# if the match is true, $2 is empty and $_ is fed pathcat as first argument.
	# running pathcat even with $_ alone is intentional to have it strip
	# .\.\ or trailing \ and the like.
}

##############################################################################
#	get all matches from current or specified directory
#
sub glob
{
	local ($dir, $wild) = &pathname (@_);
	opendir DIR, $dir or return ();
	local @glob = &filematch ($wild, readdir DIR);
	closedir DIR;
	return @glob if $dir eq '.';			# current dir: just return files
	$dir =~ s/:\.$/:/;						# turn x:. back into x:
	$dir .= '\\' unless $dir =~ /[\\:]$/;	# append \ if none there
	map {$_ = $dir . $_} @glob;				# prepend dir name to each file
}

##############################################################################
#	get matches recursively
#
sub recurs
{
	local ($base, $wild, $_, @glob, @recurs);
	($base, $wild) = @_;
	unless ($#_)					# one param: that's the search pattern,
	{								# possibly following the starting path
		($base, $_) = &pathname ($base);
		$base =~ s/\\$//;
		s/^$/\*/;					# no search pattern: find everything (*)
		s!\*![^\\\\/]{0,}!g;		# * must not match \ and '*' char has to go
		s!\?![^\\\\/]{0,1}!g;		# ? must not match \ and '?' char has to go
		$wild = "(*\\\\){0,1}$_";	# match after last \ only (avoid '?' char)
	}								# '*' will be replaced by '.*' in filematch
	@glob = sort {uc($a) cmp uc($b)} &glob ("$base\\*");	# do the glob
	@recurs = ();					# match using * (need to get all dirs)
	for (@glob)			# directories first
	{					# two-parameters-call avoids another wildcard conversion
		push @recurs, &recurs ($_, $wild) if -d;
	}			# now add the files (this is where the actual match is done)
	push @recurs, grep -f, &filematch ($wild, @glob);
	return @recurs;
}

