Here is the revised version. This should take care of the javascript problem.
Code:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<title>HTML Catalog Cleaner</title>
</head>
<body>
<?php
######################################################
## ## HTML Catalog Cleaner ## ##
######################################################
## ##
## Strips every file in the HTML catalog directory ##
## of all excess white spaces. ##
######################################################
## Version: 1.1.0 (6/26/2004)
## Last updated: 6/27/2004
# Define the Constants #
########################
define('CATALOG_DIR', '/home/nulimec/public_html/catalog'); // Set the absolute directory path to your catalog.
define('BAR_LENGTH_REDUCER', 3); // If you have over 1000 HTML files in your catalog, you may wish to set this number higher.
# System constants.
define('MAX_ET', ini_get('max_execution_time'));
define('MAX_IT', ini_get('max_input_time'));
# Initialize variables.
$successes = 0;
$failures = 0;
$filelength['init'] = 0;
$filelength['final'] = 0;
$cnt['tmp'] = 0; // Newline counter
$cnt['tot'] = 0; // Totals counter
$pblr = 0; // Progress bar length reducer variable
# Initialize regular expressions.
$regex = array(''=>'/[\t\n\r\f]+/', // Newlines and tabs
' '=>'/ +/', // Excess spaces
''=>'/ /i', // Additional space after non-breaking space
'><'=>'/> </' // Space between HTML tags
);
$java_saver = '/(<SCRIPT[^>]*>[[:space:]]*|.+?<\/SCRIPT>)/i';
# Function to Clean-up #
########################
function script_shutdown() {
ini_set('max_execution_time', MAX_ET); // Reset the maximum execution time.
ini_set('max_input_time', MAX_IT); // Reset the maximum input time.
ob_implicit_flush(0); // Data should be kept in the buffer until ready.
}
register_shutdown_function('script_shutdown'); // Register the shutdown function.
# Modify PHP Settings #
#######################
ini_set('max_execution_time', '14400'); // Make the maximum execution/input time 4 hours so that the script doesn't time-out.
ini_set('max_input_time', '14400');
ini_set('zlib.output_compression_level', 'Off'); // Turn off zlib compression, if On, to prevent Mozilla output problems.
ob_implicit_flush(1); // Show the progress in the browser.
# Pad with 256 bytes for Internet Explorer to show output immediately.
for ($pad=0; $pad <= 8*256; $pad++) echo "\t";
echo "Stripping the HTML files of excess spaces...
";
# Open the directory and store the file list.
if (is_dir(CATALOG_DIR)) {
if ($dh = opendir(CATALOG_DIR)) {
# Iterate over file list.
while (($filename = readdir($dh)) !== false) { // Use instead of scandir to skip some files.
if (strpos($filename,'.htm') !== false)
$file_list[] = $filename;
}
closedir($dh); // Close the directory.
}
# Perform specific operations on the files.
foreach ($file_list as $file) {
$file_contents = file_get_contents(CATALOG_DIR.DIRECTORY_SEPARATOR.$file);
$filelength['init'] += strlen($file_contents);
# Examine document for javascript code blocks and preserve them for restoration.
if (preg_match_all($java_saver,$file_contents,$got_java,PREG_SET_ORDER)) {
foreach ($got_java as $java_chip) {
if (is_array($java_chip)) { // Favorite ice cream! :)
$java_scripts[] = $java_chip[1];
}
}
foreach ($regex as $replace=>$finds) // Do each replacement.
$file_contents = preg_replace($finds,$replace,$file_contents);
# Reverse the damage to the javascripts.
foreach ($java_scripts as $jscript) {
foreach ($regex as $replace=>$finds) // Determine what the stripped javascript block looks like.
$stripped_java = preg_replace($finds,$replace,$jscript);
# Find the stripped java and replace it with the original code.
$file_contents = str_replace($stripped_java,$jscript,$file_contents);
}
} else {
foreach ($regex as $replace=>$finds) // Do each replacement.
$file_contents = preg_replace($finds,$replace,$file_contents);
}
$fp = fopen(CATALOG_DIR.DIRECTORY_SEPARATOR.$file, 'w'); // Truncate file, then apply the modifications.
if (!fwrite($fp,$file_contents)) {
$failure_list[] = $file; // Log failures.
$failures++;
} else {
$successes++;
}
fclose($fp);
$filelength['final'] += strlen($file_contents);
if ($pblr == BAR_LENGTH_REDUCER) { // Progress bar length reducer.
echo '|'; // Lengthen the progess bar.
$cnt['tmp']++; // Increment the newline counter.
$pblr = 0; // Reset pblr counter.
} else {
$pblr++;
}
$cnt['tot']++; // Increment totals counter.
if ($cnt['tmp']==300) { echo '
'; $cnt['tmp']=0; } // Reset the counter.
}
} else
die(''.CATALOG_DIR.' is not a directory! Please check the path and try again.');
echo '
There were '.number_format($successes).' successful cleanings and '.number_format($failures).' failures out of a total of '.number_format($cnt['tot']).' files.</p>';
echo '
Your HTML Catalog files had a total combined length of '.number_format($filelength['init']).' characters.';
echo '
They now have a total length of '.number_format($filelength['final']).' characters.</p>';
echo 'That is a total of <u>'.number_format($filelength['init']-$filelength['final']).'</u> excess white spaces removed from your files.
';
if (isset($failure_list)) {
echo '
The following files could not be written to:
';
$c = 'Y'; // Init background color notifier.
foreach ($failure_list as $fail)
# Show background color every other line for readability.
if($c=='N') {$bgb=''; $bge=''; $c='Y';} else {$bgb='<font style="background-color:#E0E0E0">'; $bge='</font>'; $c='N';}
echo $bgb.''.$fail.$bge.'
';
}
?>
</body>
</html>