How to replace some letters to sanitize selected words in a text ?
Example :
$stopwords = ['toto', 'Pied', 'the'];
$str = 'bla bla toto bla bla Toto bla bla Tôtô bla bla totô bla bla bla bla the thé pied pïed pîéd';
$letters = [
 'a' => ['â', 'à '],
 'c' => ['ç'],
 'e' => ['é', 'ê', 'è', 'ë'],
 'i' => ['î', 'ï'],
 'o' => ['ô'],
 'u' => ['û', 'ù', 'ü'],
];
$s = ['â', 'à ', 'ç', 'é', 'ê', 'è', 'ë', 'î', 'ï', 'ô', 'û', 'ù', 'ü'];
$r = ['a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'o', 'u', 'u', 'u'];
// For each stop words ...
foreach ($stopwords as $stop_word) {
 $stop_word = str_replace($s, $r, strtolower($stop_word));
 $stopword_letters = str_split($stop_word);
 $reg = '/';
 // For each letter of each word ...
 foreach ($stopword_letters as $stopword_letter) {
   if (isset($letters[$stopword_letter])) {
     $reg .= '(';
     $reg .= $stopword_letter . '|';
     foreach ($letters[$stopword_letter] as $letter_replace) {
       $reg .= $letter_replace . '|';
     }
     $reg .= '\p{M})';
   }
   else {
     $reg .= $stopword_letter;
   }
 }
 $reg .= '/mi';
 echo "<br><b>$reg</b><br>";
// $re = '/t(?:ô|o|\p{M})t(?:ô|o|\p{M})/mi';
 $re = $reg;
 $subst = '-';
 $str = preg_replace($re, $subst, $str);
 echo "The result of the substitution is " . $str;
}
For test regex online :Â https://regex101.com/
Comments