Create New Item
Item Type
File
Folder
Item Name
Search file in folder and subfolders...
Are you sure want to rename?
gilour
/
vendor
/
jfcherng
/
php-mb-string
/
src
:
MbString.php
Advanced Search
Upload
New Item
Settings
Back
Back Up
Advanced Editor
Save
<?php declare(strict_types=1); namespace Jfcherng\Utility; /** * An internal UTF-32 multi-bytes string class. * * Because UTF-8 is varied-width, mb_*() is kinda O(n) when doing decoding. * Using iconv() to make it UTF-32 and work with str*() can be possibly faster. * * UTF-32 is a fix-width encoding (1 char = 4 bytes). * Note that the first 4 bytes in a UTF-32 string is the header (endian bytes). * * @author Jack Cherng <jfcherng@gmail.com> */ class MbString extends \ArrayObject { /** * UTF-32 string without endian bytes. * * @var string */ protected $str; /** * The original encoding. * * @var string */ protected $encoding; /** * The endian bytes for UTF-32. * * @var string */ protected static $utf32Header; /** * The constructor. * * @param string $str the string * @param string $encoding the encoding */ public function __construct(string $str = '', string $encoding = 'UTF-8') { static::$utf32Header = static::$utf32Header ?? static::getUtf32Header(); $this->encoding = $encoding; $this->set($str); } /** * Returns a string representation of the object. * * @return string string representation of the object */ public function __toString(): string { return $this->get(); } /** * The string setter. * * @param string $str the string */ public function set(string $str): self { $this->str = $this->inputConv($str); return $this; } public function setAt(int $idx, string $char): self { $char = $this->inputConv($char); if (\strlen($char) > 4) { $char = \substr($char, 0, 4); } $spacesPrepend = $idx - $this->strlen(); // set index (out of bound) if ($spacesPrepend > 0) { $this->str .= $this->inputConv(\str_repeat(' ', $spacesPrepend)) . $char; } else { // set index (in bound) $this->str = \substr_replace($this->str, $char, $idx << 2, 4); } return $this; } /** * The string getter. */ public function get(): string { return $this->outputConv($this->str); } /** * The raw string getter. * * @return string the UTF-32-encoded raw string */ public function getRaw(): string { return $this->str; } public function getAt(int $idx): string { return $this->outputConv(\substr($this->str, $idx << 2, 4)); } public function getAtRaw(int $idx): string { return \substr($this->str, $idx << 2, 4); } public function toArray(): array { return self::strToChars($this->get()); } public function toArraySplit(string $regex, int $limit = -1, $flags = 0): array { if ($this->str === '') { return []; } return \preg_split($regex, $this->get(), $limit, $flags); } public function toArrayRaw(): array { if ($this->str === '') { return []; } return \str_split($this->str, 4); } public static function strToChars(string $str): array { return \preg_match_all('/./suS', $str, $matches) ? $matches[0] : []; } /////////////////////////////////// // string manipulation functions // /////////////////////////////////// public function stripos(string $needle, int $offset = 0) { $needle = $this->inputConv($needle); $pos = \stripos($this->str, $needle, $offset << 2); return \is_bool($pos) ? $pos : $pos >> 2; } public function strlen(): int { return \strlen($this->str) >> 2; } public function strpos(string $needle, int $offset = 0) { $needle = $this->inputConv($needle); $pos = \strpos($this->str, $needle, $offset << 2); return \is_bool($pos) ? $pos : $pos >> 2; } public function substr(int $start = 0, ?int $length = null): string { return $this->outputConv( isset($length) ? \substr($this->str, $start << 2, $length << 2) : \substr($this->str, $start << 2) ); } public function substr_replace(string $replacement, int $start = 0, ?int $length = null): string { $replacement = $this->inputConv($replacement); return $this->outputConv( isset($length) ? \substr_replace($this->str, $replacement, $start << 2, $length << 2) : \substr_replace($this->str, $replacement, $start << 2) ); } public function strtolower(): string { return \strtolower($this->get()); } public function strtoupper(): string { return \strtoupper($this->get()); } //////////////////////////////// // non-manipulative functions // //////////////////////////////// public function has(string $needle): bool { $needle = $this->inputConv($needle); return \strpos($this->str, $needle) !== false; } public function startsWith(string $needle): bool { $needle = $this->inputConv($needle); return $needle === \substr($this->str, 0, \strlen($needle)); } public function endsWith(string $needle): bool { $needle = $this->inputConv($needle); $length = \strlen($needle); return $length === 0 ? true : $needle === \substr($this->str, -$length); } ///////////////////////////////////////////// // those functions will not return a value // ///////////////////////////////////////////// public function str_insert_i(string $insert, int $position): self { $insert = $this->inputConv($insert); $this->str = \substr_replace($this->str, $insert, $position << 2, 0); return $this; } public function str_enclose_i(array $closures, int $start = 0, ?int $length = null): self { // ex: $closures = array('{', '}'); foreach ($closures as &$closure) { $closure = $this->inputConv($closure); } unset($closure); if (\count($closures) < 2) { $closures[0] = $closures[1] = \reset($closures); } if (isset($length)) { $replacement = $closures[0] . \substr($this->str, $start << 2, $length << 2) . $closures[1]; $this->str = \substr_replace($this->str, $replacement, $start << 2, $length << 2); } else { $replacement = $closures[0] . \substr($this->str, $start << 2) . $closures[1]; $this->str = \substr_replace($this->str, $replacement, $start << 2); } return $this; } public function str_replace_i(string $search, string $replace): self { $search = $this->inputConv($search); $replace = $this->inputConv($replace); $this->str = \str_replace($search, $replace, $this->str); return $this; } public function substr_replace_i(string $replacement, int $start = 0, ?int $length = null): self { $replacement = $this->inputConv($replacement); $this->str = ( isset($length) ? \substr_replace($this->str, $replacement, $start << 2, $length << 2) : \substr_replace($this->str, $replacement, $start << 2) ); return $this; } ///////////////// // ArrayObject // ///////////////// #[\ReturnTypeWillChange] public function offsetSet($idx, $char): void { $this->setAt($idx, $char); } #[\ReturnTypeWillChange] public function offsetGet($idx): string { return $this->getAt($idx); } #[\ReturnTypeWillChange] public function offsetExists($idx): bool { return \is_int($idx) ? $this->strlen() > $idx : false; } #[\ReturnTypeWillChange] public function append($str): void { $this->str .= $this->inputConv($str); } public function count(): int { return $this->strlen(); } //////////////////// // misc functions // //////////////////// /** * Gets the utf 32 header. * * @return string the UTF-32 header or empty string */ protected static function getUtf32Header(): string { // just use any string to get the endian header, here we use "A" $tmp = \iconv('UTF-8', 'UTF-32', 'A'); // some distributions like "php alpine" docker image won't generate the header return $tmp && \strlen($tmp) > 4 ? \substr($tmp, 0, 4) : ''; } /** * Convert the output string to its original encoding. * * @param string $str The string */ protected function outputConv(string $str): string { if ($str === '') { return ''; } return \iconv('UTF-32', $this->encoding, static::$utf32Header . $str); } /** * Convert the input string to UTF-32 without header. * * @param string $str The string */ protected function inputConv(string $str): string { if ($str === '') { return ''; } return \substr(\iconv($this->encoding, 'UTF-32', $str), \strlen(static::$utf32Header)); } }