I rewrote this.. There was an issue with this making a new url LOC for each image file which didnt match the specs:
https://support.google.com/webmasters/answer/178636?hl=en
so google was saying there was 40k images and 40k urls in the sitemap.. not sure if I broke something or it happens to everyone..
This works perfect with xc4.1.12 with cdseo and should work with newer versions
this only does detailed imgs, splits files if there's more than 40k images..
uses https filenames.. note this code only works with CDSEO urls.. you can change the part that gets the url pretty easily though.
Code:
require "./auth.php";
//================== Settings ==================
$sitemap_filename = "images_sitemap";
$products_limit = "1000000";
$sm='';
//$product_urls_extension = $config['SEO']['clean_urls_ext_p'];
//$geolocation = $config['Company']['location_address'].", ".$config['Company']['location_city'].", ".$config['Company']['location_state_name'].", ".$config['Company']['location_zipcode'].", ".$config['Company']['location_country_name'];
//$license_info_url = $https_location."/pages.php?pageid=3#images";
$geolocation = false; // Uncomment to not display geo_location info
$license_info_url = false; // Uncomment to not display license info
//================== / Settings ================
$images_sitemap = fopen($sitemap_filename.$sm.'.xml', "wb");
//-----------------------
$build = '<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">'."\n";
//-----------------------
$count=0;
$products=func_query("SELECT * FROM $sql_tbl[products] WHERE forsale='Y' ORDER BY productid DESC LIMIT $products_limit");
foreach ($products as $product)
{
$pid=$product['productid'];
//get URL!
$cdseo_url = func_query_first_cell("SELECT cdseoUrl FROM wcm_cdseo WHERE cdseoReplaceID='$pid' AND cdseoType='product'",true);
if ($cdseo_url!='') {
$cdseo_url.=".html";
$clean_url=$cdseo_url;
} else {
continue; //if no clean url, skip the product
}
$build .= "\t<url>
<loc>$https_location/$clean_url</loc>\n";
$query="SELECT * FROM $sql_tbl[images_D] WHERE id='$pid'";
$images = func_query($query);
foreach ($images as $result)
{
$build.="\t\t".'<image:image>
<image:loc><![CDATA['.$https_location."/".str_replace(array("./"),"",$result['image_path']).']]></image:loc>'."\r\n";
if ($result['alt']) {
$build .= "\t\t\t".'<image:caption>'.sanitize_for_xml($result['alt']).'</image:caption>
<image:title>'.sanitize_for_xml($result['alt']).'</image:title>'."\r\n";
}
if ($geolocation) {
$build .= "\t".'<image:geo_location>'.htmlspecialchars($geolocation).'</image:geo_location>'."\r\n";
}
if ($license_info_url) {
$build .= "\t".'<image:license>'.$license_info_url.'</image:license>'."\r\n";
}
$build .= "\t\t".'</image:image>';
$count ;
} //end foreach image
$build.="\n\t</url>\n";
if ($count>=40000) //split the file
{
$build .= '</urlset>';
if ($sm=='')
$sm=2;
else
$sm ;
fwrite($images_sitemap, $build);
fclose($images_sitemap);
$images_sitemap = fopen($sitemap_filename.$sm.'.xml', "wb");
$build = '<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">'."\n";
$count=0;
}
} //end foreach product
$build .= '</urlset>';
//-----------------------
fwrite($images_sitemap, $build);
fclose($images_sitemap);
echo "DONE! images: $count\n";
exit;
function sanitize_for_xml($v) {
// Strip invalid UTF-8 byte sequences - this part may not be strictly necessary, could be separated to another function
$v = mb_convert_encoding(mb_convert_encoding($v, 'UTF-16', 'UTF-8'), 'UTF-8', 'UTF-16');
// Remove various characters not allowed in XML
$v = preg_replace('/[^\x{0009}\x{000A}\x{000D}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u', '?', $v);
return htmlspecialchars($v);
}