View Single Post
  #16  
Old 06-30-2004, 07:22 PM
  adpboss's Avatar 
adpboss adpboss is offline
 

X-Man
  
Join Date: Feb 2003
Location: Ontario, Canada
Posts: 2,389
 

Default

Works with my java pop stuff, it's relatively fast and the script terminates properly with the report at the end.

This includes all of the bug fixes and updates up to the time of this post.

Using version 3.4.14.

GREAT JOB NUALPHA!

Code:
<?php ini_set('zlib.output_compression', 'Off'); // Turn off zlib compression, if On, to prevent Mozilla output problems. ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <title>HTML Catalog Cleaner</title> </head> <body> <?php ###################################################### ## ## HTML Catalog Cleaner ## ## ###################################################### ## ## ## Strips every file in the HTML catalog directory ## ## of all excess white spaces. ## ###################################################### ## Version: 1.1.2 (6/26/2004) ## Last updated: 6/28/2004 # Define the Constants # ######################## define('CATALOG_DIR', '/home/your-site-dir/path-to-xcart/catalog'); // Set the absolute directory path to your catalog. define('BAR_LENGTH_REDUCER', 3); // If you have over 1000 HTML files in your catalog, you may wish to set this number higher. # System constants. define('MAX_ET', ini_get('max_execution_time')); define('MAX_IT', ini_get('max_input_time')); # Modify PHP Settings # ####################### ini_set('max_execution_time', '14400'); // Make the maximum execution & input time 4 hours so that the script doesn't time-out. ini_set('max_input_time', '14400'); ob_implicit_flush(1); // Show the progress in the browser. # Initialize variables. $successes = 0; $failures = 0; $filelength['init'] = 0; $filelength['final'] = 0; $cnt['tmp'] = 0; // Newline counter $cnt['tot'] = 0; // Totals counter $pblr = 0; // Progress bar length reducer variable # Initialize regular expressions. $regex = array(''=>'/[\t\n\r\f]+/', // Newlines and tabs ' '=>'/ +/', // Excess spaces ''=>'/ /i', // Additional space after non-breaking space '><'=>'/> </' // Space between HTML tags ); $java_saver = '/(<script[^>]*>.*?<\/script>)/si'; # Function to Clean-up # ######################## function script_shutdown() { ini_set('max_execution_time', MAX_ET); // Reset the maximum execution time. ini_set('max_input_time', MAX_IT); // Reset the maximum input time. ob_implicit_flush(0); // Data should be kept in the buffer until ready. } register_shutdown_function('script_shutdown'); // Register the shutdown function. # Pad with 256 bytes for Internet Explorer to show output immediately. if (strpos($_SERVER['HTTP_USER_AGENT'],'MSIE') !== false) for ($pad=0; $pad < 256; $pad++) echo "\t"; echo "\n"; echo "Stripping the HTML files of excess spaces... "; # Open the directory and store the file list. if (is_dir(CATALOG_DIR)) { if ($dh = opendir(CATALOG_DIR)) { # Iterate over file list. while (($filename = readdir($dh)) !== false) { // Use instead of scandir to skip some files. if (strpos($filename,'.htm') !== false) $file_list[] = $filename; } closedir($dh); // Close the directory. } # Perform specific operations on the files. foreach ($file_list as $file) { $file_contents = file_get_contents(CATALOG_DIR.DIRECTORY_SEPARATOR.$file); $filelength['init'] += strlen($file_contents); # Examine document for javascript code blocks and preserve them for restoration. if (preg_match_all($java_saver,$file_contents,$got_java,PREG_SET_ORDER)) { foreach ($got_java as $java_chip) { if (is_array($java_chip)) { $java_scripts[] = $java_chip[1]; } } foreach ($regex as $replace=>$finds) // Do each replacement. $file_contents = preg_replace($finds,$replace,$file_contents); # Reverse the damage to the javascripts. if (preg_match_all($java_saver,$file_contents,$got_java,PREG_SET_ORDER)) { foreach ($got_java as $stripped_java) { if (is_array($stripped_java)) { # Find the stripped java and replace it with the original code. $file_contents = str_replace($stripped_java[1],current($java_scripts),$file_contents); next($java_scripts); } } } } else { foreach ($regex as $replace=>$finds) // Do each replacement. $file_contents = preg_replace($finds,$replace,$file_contents); } $fp = fopen(CATALOG_DIR.DIRECTORY_SEPARATOR.$file, 'w'); // Truncate file, then apply the modifications. if (!fwrite($fp,$file_contents)) { $failure_list[] = $file; // Log failures. $failures++; } else { $successes++; } fclose($fp); unset($java_scripts); $java_scripts = array(); $filelength['final'] += strlen($file_contents); if ($pblr == BAR_LENGTH_REDUCER) { // Progress bar length reducer. echo '|'; // Lengthen the progess bar. $cnt['tmp']++; // Increment the newline counter. $pblr = 0; // Reset pblr counter. } else { $pblr++; } $cnt['tot']++; // Increment totals counter. if ($cnt['tmp']==300) { echo ' '; $cnt['tmp']=0; } // Reset the counter. } } else die(''.CATALOG_DIR.' is not a directory! Please check the path and try again.'); echo ' There were '.number_format($successes).' successful cleanings and '.number_format($failures).' failures out of a total of '.number_format($cnt['tot']).' files.</p>'; echo ' Your HTML Catalog files had a total combined length of '.number_format($filelength['init']).' characters.'; echo ' They now have a total length of '.number_format($filelength['final']).' characters.</p>'; echo 'That is a total of <u>'.number_format($filelength['init']-$filelength['final']).'</u> excess white spaces removed from your files. '; if (isset($failure_list)) { echo ' The following files could not be written to: '; $c = 'Y'; // Init background color notifier. foreach ($failure_list as $fail) { # Show background color every other line for readability. if($c=='N') {$bgb=''; $bge=''; $c='Y';} else {$bgb='<font style="background-color:#E0E0E0">'; $bge='</font>'; $c='N';} echo $bgb.''.$fail.$bge.' '; } } ?> </body> </html>
Reply With Quote