How to replace some letters to sanitize selected words in a text ?
Example :
$stopwords = ['toto', 'Pied', 'the'];
$str = 'bla bla toto bla bla Toto bla bla Tôtô bla bla totô bla bla bla bla the thé pied pïed pîéd';
$letters = [
'a' => ['â', 'à '],
'c' => ['ç'],
'e' => ['é', 'ê', 'è', 'ë'],
'i' => ['î', 'ï'],
'o' => ['ô'],
'u' => ['û', 'ù', 'ü'],
];
$s = ['â', 'à ', 'ç', 'é', 'ê', 'è', 'ë', 'î', 'ï', 'ô', 'û', 'ù', 'ü'];
$r = ['a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'o', 'u', 'u', 'u'];
// For each stop words ...
foreach ($stopwords as $stop_word) {
$stop_word = str_replace($s, $r, strtolower($stop_word));
$stopword_letters = str_split($stop_word);
$reg = '/';
// For each letter of each word ...
foreach ($stopword_letters as $stopword_letter) {
if (isset($letters[$stopword_letter])) {
$reg .= '(';
$reg .= $stopword_letter . '|';
foreach ($letters[$stopword_letter] as $letter_replace) {
$reg .= $letter_replace . '|';
}
$reg .= '\p{M})';
}
else {
$reg .= $stopword_letter;
}
}
$reg .= '/mi';
echo "<br><b>$reg</b><br>";
// $re = '/t(?:ô|o|\p{M})t(?:ô|o|\p{M})/mi';
$re = $reg;
$subst = '-';
$str = preg_replace($re, $subst, $str);
echo "The result of the substitution is " . $str;
}
For test regex online : https://regex101.com/
Comments