['FE80', 'FE80', 'FE80', 'FE80'], // Hamza '0622' => ['FE81', 'FE82', 'FE81', 'FE82'], // Alef with Madda '0623' => ['FE83', 'FE84', 'FE83', 'FE84'], // Alef with Hamza Above '0624' => ['FE85', 'FE86', 'FE85', 'FE86'], // Waw with Hamza Above '0625' => ['FE87', 'FE88', 'FE87', 'FE88'], // Alef with Hamza Below '0626' => ['FE89', 'FE8A', 'FE8B', 'FE8C'], // Yeh with Hamza Above '0627' => ['FE8D', 'FE8E', 'FE8D', 'FE8E'], // Alef '0628' => ['FE8F', 'FE90', 'FE91', 'FE92'], // Beh '0629' => ['FE93', 'FE94', 'FE93', 'FE94'], // Teh Marbuta '062A' => ['FE95', 'FE96', 'FE97', 'FE98'], // Teh '062B' => ['FE99', 'FE9A', 'FE9B', 'FE9C'], // Theh '062C' => ['FE9D', 'FE9E', 'FE9F', 'FEA0'], // Jeem '062D' => ['FEA1', 'FEA2', 'FEA3', 'FEA4'], // Hah '062E' => ['FEA5', 'FEA6', 'FEA7', 'FEA8'], // Khah '062F' => ['FEA9', 'FEAA', 'FEA9', 'FEAA'], // Dal '0630' => ['FEAB', 'FEAC', 'FEAB', 'FEAC'], // Thal '0631' => ['FEAD', 'FEAE', 'FEAD', 'FEAE'], // Reh '0632' => ['FEAF', 'FEB0', 'FEAF', 'FEB0'], // Zain '0633' => ['FEB1', 'FEB2', 'FEB3', 'FEB4'], // Seen '0634' => ['FEB5', 'FEB6', 'FEB7', 'FEB8'], // Sheen '0635' => ['FEB9', 'FEBA', 'FEBB', 'FEBC'], // Sad '0636' => ['FEBD', 'FEBE', 'FEBF', 'FEC0'], // Dad '0637' => ['FEC1', 'FEC2', 'FEC3', 'FEC4'], // Tah '0638' => ['FEC5', 'FEC6', 'FEC7', 'FEC8'], // Zah '0639' => ['FEC9', 'FECA', 'FECB', 'FECC'], // Ain '063A' => ['FECD', 'FECE', 'FECF', 'FED0'], // Ghain '0640' => ['0640', '0640', '0640', '0640'], // Tatweel '0641' => ['FED1', 'FED2', 'FED3', 'FED4'], // Feh '0642' => ['FED5', 'FED6', 'FED7', 'FED8'], // Qaf '0643' => ['FED9', 'FEDA', 'FEDB', 'FEDC'], // Kaf '0644' => ['FEDD', 'FEDE', 'FEDF', 'FEE0'], // Lam '0645' => ['FEE1', 'FEE2', 'FEE3', 'FEE4'], // Meem '0646' => ['FEE5', 'FEE6', 'FEE7', 'FEE8'], // Noon '0647' => ['FEE9', 'FEEA', 'FEEB', 'FEEC'], // Heh '0648' => ['FEED', 'FEEE', 'FEED', 'FEEE'], // Waw '0649' => ['FEEF', 'FEF0', 'FEEF', 'FEF0'], // Alef Maksura '064A' => ['FEF1', 'FEF2', 'FEF3', 'FEF4'], // Yeh ]; private $connects_before = [ '0626','0628','0629','062A','062B','062C','062D','062E', '0633','0634','0635','0636','0637','0638','0639','063A','0640','0641','0642', '0643','0644','0645','0646','0647','064A' ]; // Letters that prevent the *next* letter from connecting to *this* one (Right-joining only) private $disconnects_after = [ '0621','0622','0623','0624','0625','0627','062F','0630','0631','0632','0648','0649' ]; public function utf8Glyphs($str) { $hex = $this->utf8ToHexArray($str); $res = []; $len = count($hex); for ($i = 0; $i < $len; $i++) { $current = $hex[$i]; // Skip non-Arabic chars (or spaces) in shaping logic if (!isset($this->presentation_forms[$current])) { $res[] = $current; continue; } // Check for Lam-Alef Ligature if ($current == '0644' && $i < $len - 1) { $next = $hex[$i+1]; $ligature = null; switch($next) { case '0622': $ligature = ['FEF5', 'FEF6', 'FEF5', 'FEF6']; break; // Lam-Alef Madda case '0623': $ligature = ['FEF7', 'FEF8', 'FEF7', 'FEF8']; break; // Lam-Alef Hamza Above case '0625': $ligature = ['FEF9', 'FEFA', 'FEF9', 'FEFA']; break; // Lam-Alef Hamza Below case '0627': $ligature = ['FEFB', 'FEFC', 'FEFB', 'FEFC']; break; // Lam-Alef } if ($ligature) { // Determine connection from previous $prev = $i > 0 ? $hex[$i-1] : null; $connect_prev = $prev && $this->canConnect($prev); if ($connect_prev) { $res[] = $ligature[1]; // Final form of ligature } else { $res[] = $ligature[0]; // Isolated form of ligature } $i++; // Skip the Alef continue; } } // Normal Logic $prev = $i > 0 ? $hex[$i-1] : null; $next = $i < $len - 1 ? $hex[$i+1] : null; $connect_prev = $prev && $this->canConnect($prev); // For next connection, we check if current can connect left AND next can connect right $connect_next = $next && $this->canConnectLeft($current) && $this->isArabic($next); // Refined Logic if ($connect_prev && $connect_next) { $form = 3; // Medial } elseif ($connect_prev) { $form = 1; // Final } elseif ($connect_next) { $form = 2; // Initial } else { $form = 0; // Isolated } $res[] = $this->presentation_forms[$current][$form]; } $s = $this->hexArrayToUtf8($res); return $this->mb_strrev($s); } private function canConnect($hex) { // Can $hex connect to the *next* letter? // True if it is Arabic and NOT in disconnects_after return isset($this->presentation_forms[$hex]) && !in_array($hex, $this->disconnects_after) && $hex != '0621'; // Hamza doesn't connect } private function canConnectLeft($hex) { // Can $hex connect to the *next* letter? (Same as above) return $this->canConnect($hex); } private function isArabic($hex) { return isset($this->presentation_forms[$hex]); } private function utf8ToHexArray($str) { $out = []; $len = mb_strlen($str, 'UTF-8'); for($i=0; $i<$len; $i++) { $c = mb_substr($str, $i, 1, 'UTF-8'); $val = $this->uniord($c); $out[] = sprintf("%04X", $val); } return $out; } private function hexArrayToUtf8($arr) { $str = ''; foreach ($arr as $hex) { $val = hexdec($hex); $str .= $this->unichr($val); } return $str; } private function uniord($u) { if (strlen($u) === 1) return ord($u); $k = mb_convert_encoding($u, 'UCS-2LE', 'UTF-8'); $k1 = ord(substr($k, 0, 1)); $k2 = ord(substr($k, 1, 1)); return $k2 * 256 + $k1; } private function unichr($u) { return mb_convert_encoding(pack("n", $u), 'UTF-8', 'UCS-2BE'); } private function mb_strrev($str){ preg_match_all('/./us', $str, $ar); return join('', array_reverse($ar[0])); } }