View Single Post
  #9  
Old 04-03-2017, 11:15 PM
 
mattstyle2 mattstyle2 is offline
 

Advanced Member
  
Join Date: Apr 2015
Posts: 54
 

Default Re: X-Cart Product Images Sitemap Generator

I rewrote this.. There was an issue with this making a new url LOC for each image file which didnt match the specs:
https://support.google.com/webmasters/answer/178636?hl=en


so google was saying there was 40k images and 40k urls in the sitemap.. not sure if I broke something or it happens to everyone..

This works perfect with xc4.1.12 with cdseo and should work with newer versions

this only does detailed imgs, splits files if there's more than 40k images..

uses https filenames.. note this code only works with CDSEO urls.. you can change the part that gets the url pretty easily though.


Code:
require "./auth.php"; //================== Settings ================== $sitemap_filename = "images_sitemap"; $products_limit = "1000000"; $sm=''; //$product_urls_extension = $config['SEO']['clean_urls_ext_p']; //$geolocation = $config['Company']['location_address'].", ".$config['Company']['location_city'].", ".$config['Company']['location_state_name'].", ".$config['Company']['location_zipcode'].", ".$config['Company']['location_country_name']; //$license_info_url = $https_location."/pages.php?pageid=3#images"; $geolocation = false; // Uncomment to not display geo_location info $license_info_url = false; // Uncomment to not display license info //================== / Settings ================ $images_sitemap = fopen($sitemap_filename.$sm.'.xml', "wb"); //----------------------- $build = '<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">'."\n"; //----------------------- $count=0; $products=func_query("SELECT * FROM $sql_tbl[products] WHERE forsale='Y' ORDER BY productid DESC LIMIT $products_limit"); foreach ($products as $product) { $pid=$product['productid']; //get URL! $cdseo_url = func_query_first_cell("SELECT cdseoUrl FROM wcm_cdseo WHERE cdseoReplaceID='$pid' AND cdseoType='product'",true); if ($cdseo_url!='') { $cdseo_url.=".html"; $clean_url=$cdseo_url; } else { continue; //if no clean url, skip the product } $build .= "\t<url> <loc>$https_location/$clean_url</loc>\n"; $query="SELECT * FROM $sql_tbl[images_D] WHERE id='$pid'"; $images = func_query($query); foreach ($images as $result) { $build.="\t\t".'<image:image> <image:loc><![CDATA['.$https_location."/".str_replace(array("./"),"",$result['image_path']).']]></image:loc>'."\r\n"; if ($result['alt']) { $build .= "\t\t\t".'<image:caption>'.sanitize_for_xml($result['alt']).'</image:caption> <image:title>'.sanitize_for_xml($result['alt']).'</image:title>'."\r\n"; } if ($geolocation) { $build .= "\t".'<image:geo_location>'.htmlspecialchars($geolocation).'</image:geo_location>'."\r\n"; } if ($license_info_url) { $build .= "\t".'<image:license>'.$license_info_url.'</image:license>'."\r\n"; } $build .= "\t\t".'</image:image>'; $count ; } //end foreach image $build.="\n\t</url>\n"; if ($count>=40000) //split the file { $build .= '</urlset>'; if ($sm=='') $sm=2; else $sm ; fwrite($images_sitemap, $build); fclose($images_sitemap); $images_sitemap = fopen($sitemap_filename.$sm.'.xml', "wb"); $build = '<?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">'."\n"; $count=0; } } //end foreach product $build .= '</urlset>'; //----------------------- fwrite($images_sitemap, $build); fclose($images_sitemap); echo "DONE! images: $count\n"; exit; function sanitize_for_xml($v) { // Strip invalid UTF-8 byte sequences - this part may not be strictly necessary, could be separated to another function $v = mb_convert_encoding(mb_convert_encoding($v, 'UTF-16', 'UTF-8'), 'UTF-8', 'UTF-16'); // Remove various characters not allowed in XML $v = preg_replace('/[^\x{0009}\x{000A}\x{000D}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u', '?', $v); return htmlspecialchars($v); }
__________________
4.7.7.
php7.1, AWS RDS database
memcache, reboot theme AWS EC2 load balancer, 2 c5.large instances CDSEO Shop By Filters,
AC onepage checkout and checkout tools,
altercart cash rewards, bcse DPM for paypal
Reply With Quote