# change this to control debug messages:
# 0 -> debug off
# 1 -> only log what happens when mappings are reloaded
# 2 -> also log what goes on in each request
$debug = 0;
# The file that we parse is sourced from the "Extra Files" section
# of the Traffic Manager and we expect it to be named the same as
# the TrafficScript rule (so if your rule is named MyRewrite, the rule will
# expect a file called "MyRewrite" in the "extra" directory).
# The file we read has 3 elements that are space separated:
# Element one is the string RD or RW depending if we are redirecting
# the connection or rewriting it
# Element two is the old URL
# Element three is the new URL
# example:
# RD /oldurl http://newsite.com/newurl
# We cannot store the hash-table of mappings in data.get/set because
# that is inefficient due to the constant (de-)serialization.
# Instead, we employ the following 'flattening' strategy: all direct
# mappings (no wildcard or regex) are stored in data.get/set as is.
# Wildcards and regexes are sorted by how specific they are (most
# specific first) and then stored under the key $file . $idx, where
# $idx is their position in the pecking order. When we do a lookup,
# we check the direct mappings first. If we don't find anything, we
# go through the wc/regex keys starting with index 0, counting up.
# As long as we keep finding the key, we check for a match. When we
# fail to find the key, we know we've checked all entries and give
# up.
sub sortAndInsertMappings( $prefix,$mappings,$debug ) {
# the '1' indicates reverse order: we want big numbers first
$sorted = array.sort( hash.keys( $mappings ), 1 );
$upper = array.length( $sorted );
for( $i = 0; $i < $upper; ++$i ) {
$k = $sorted[$i];
$path = string.skip( $k, 2 ); # strip off number of slashes
$key = $prefix . $i;
$value = $path . " " . $mappings[$k];
if( $debug ) { log.info( "Mapping from " . $key . " to '" . $value . "'" ); }
data.set( $key, $value );
}
}
sub reloadRedirects( $file, $fileTime,$debug ) {
$nonflat_rd = []; # empty hash
$nonflat_rw = [];
# data.reset is really expensive, but could be avoided by actually
# storing an array of 'our' keys under a dedicated entry. That
# would only be worth it if the number of elements in data that
# are not ours is significant, since then the deserialization
# followed by an iteration over the relevant elements would
# be cheaper than the full scan.
data.reset( $file );
$paths = resource.getlines( $file );
foreach ( $path in $paths ) {
$data = string.split( $path );
if( 3 != array.length( $data ) ) {
log.warn( "Invalid line: " . $line );
continue;
}
$prefix = $data[0];
$path = $data[1];
$mapping = $data[2];
if( !string.contains( $mapping, "$1" ) && !string.contains( $path, "*" ) ) {
# simple mapping
$k = $file . $prefix . $path;
if( $debug ) { log.info( "Direct mapping from " . $k . " to " . $mapping ); }
data.set( $k, $mapping );
} else {
# wc or regex
# Create a 2-byte binary representation of the number in network
# byte order. This means we can use alphabetical sorting and still
# end up with an array sorted numerically
$num_slashes = string.intToBytes( string.count( $path, "/" ), 2 );
if( $prefix == "RD" ) {
$nonflat_rd[ $num_slashes . $path ] = $mapping;
} else if ( $prefix == "RW" ) {
$nonflat_rw[ $num_slashes . $path ] = $mapping;
} else {
log.warn( "Invalid prefix: " . $prefix );
}
}
}
sortAndInsertMappings( $file . "RD", $nonflat_rd, $debug );
sortAndInsertMappings( $file . "RW", $nonflat_rw, $debug );
data.set( $file . "-MTIME", $fileTime );
}
sub checkPaths( $prefix, $path, $debug ) {
$k = rule.getname() . $prefix . $path;
$mapping = data.get( $k );
if ( string.length( $mapping ) ) {
if( $debug > 1 ) { log.info( "Straight swap from " . $path . " to " . $mapping ); }
return $mapping;
}
for( $i = 0; 1; ++$i ) {
$k = rule.getname() . $prefix . $i;
$data = data.get( $k );
if( $debug > 1 ) { log.info( "Checking key " . $k ); }
if( 0 == string.length( $data ) ) {
if( $debug > 1 ) {
log.info( "No more entries, no match found after " . $i . " entries" );
}
return "";
}
if( $debug > 1 ) { log.info( "Found data " . $data ); }
$arr = string.split( $data );
if( 2 != array.length( $arr ) ) {
log.warn( "Invalid data entry: " . $data );
continue;
}
$match = $arr[0];
$mapping = $arr[1];
if( string.contains( $mapping, "$1" ) ) {
# User needs a regex match /foo/(.*) /bar/$1
if( string.regexmatch( $path, $match ) ) {
if( $debug > 1 ) { log.info( "Regex matched" ); }
return string.regexsub( $path, $match, $mapping );
}
} else if( string.endswith( $match, "*" ) ) {
# Redirect "/foo/*" to /bar
$p = string.drop( $match, 1 );
if( string.startswith( $path, $p ) ) {
if( $debug > 1 ) { log.info( "Wildcard matched" ); }
return string.replace( $path, $p, $mapping );
}
}
}
return "";
}
if( $debug > 1 ) { $start_time = sys.time.highres(); }
checkMappingsUpToDate();
$path = http.getPath();
$rdPath = checkPaths( "RD", $path, $debug );
if ( string.length( $rdPath ) ) {
if( $debug > 1 ) {
log.info( "Redirecting: " . $path . " to " . $rdPath . "; Elapsed: "
. (sys.time.highres() - $start_time) );
}
http.redirect( $rdPath );
}
$rwPath = checkPaths( "RW", $path, $debug );
if ( string.length( $rwPath ) ) {
if( $debug > 1 ) {
log.info( "Rewriting: " . $path . " to " . $rwPath . "; Elapsed: "
. (sys.time.highres() - $start_time) );
}
http.setPath( $rwPath );
}
if( $debug > 1 ) {
log.info( "No match, request goes through unchanged; Elapsed: "
. (sys.time.highres() - $start_time) );
}
# Since we store the mappings as multiple values, when the file has
# changed, we have to delete all mappings from the file and then step
# by step populate the map again with the new mappings. This means
# that while we're re-populating, a lookup for a particular value might
# incorrectly find neither the old nor the new value. TrafficScript doesn't
# have real locks that guarantee access by only one process. We can
# emulate them closely, however, by guarding write access to the
# mappings with a single 0/1 entry.
sub checkMappingsUpToDate() {
$pid = sys.getpid();
if( !string.length( data.get( $pid ) ) ) { data.set( $pid, 0 ); }
$file = rule.getname();
if ( resource.exists( $file ) ) {
# we could use 'mtime' as the 'lock' key as well by setting it to
# a 'magic' value to indicate we're updating
$lock_key = $file . "-LOCK";
$mod_key = $file . "-MTIME";
while( 1 ) {
$mtime = data.get( $mod_key );
$fileTime = resource.getMTime( $file );
if ( $mtime == $fileTime ) {
break;
}
if ( !data.get( $lock_name ) ) {
data.set( $lock_name, "1" ); # 'lock'
log.info( $pid . " parsing file" );
reloadRedirects( $file, $fileTime, $debug );
data.remove( $lock_name ); # 'unlock'
break;
} else {
# wait for the other process to reload the file
$waits = data.get( $pid );
data.set( $pid, $waits+1 );
connection.sleep( 2 );
}
}
}
}
This TrafficScript library (libTable.rts: Interrogating tables of data in TrafficScript) provides a way to query and iterate through tables of data.
You could implement the above solution in a simpler fashion by maintaining several tables of data ( tables for RD vs RW; tables for full matches against wildcard matches) and then searching each table with the URL.