Pages

Thursday, March 25, 2010

Remove Unicode Characters Using PHP

function fix_ms_smart_quotes($string) {
#
$text = str_replace(
#
array("\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", "\xe2\x80\x9d", "\xe2\x80\x93", "\xe2\x80\x94", "\xe2\x80\xa6"),
#
array("'", "'", '"', '"', '-', '--', '...'),
#
$string);
#
// Next, replace their Windows-1252 equivalents.
#
$text = str_replace(
#
array(chr(145), chr(146), chr(147), chr(148), chr(150), chr(151), chr(133)),
#
array("'", "'", '"', '"', '-', '--', '...'),
#
$text);
#
return $text;
#
}


function removeHTMLTags($content){

$content = mb_convert_encoding($content, 'UTF-8', 'HTML-ENTITIES');
$content = fix_ms_smart_quotes($content);
$content = htmlspecialchars_decode($content);
$content = html_entity_decode($content);
$content = stripslashes($content);

return $content;
}

No comments:

Post a Comment