1 <?php
2 namespace apemsel\AttributedString;
3
4 5 6 7 8 9 10
11 class AttributedString implements \Countable, \ArrayAccess
12 {
13 protected $string;
14 protected $attributes = [];
15 protected $length;
16 protected $byteToChar;
17
18 19 20
21 public function __construct($string) {
22 if (is_string($string)) {
23 $this->string = $string;
24 $this->length = mb_strlen($string, "utf-8");
25 }
26 elseif ($string instanceof AttributedString) {
27 $this->string = $string->string;
28 $this->attributes = $string->attributes;
29 $this->lenght = $string->length;
30 $this->byteToChar = $string->byteToChar;
31 }
32 else {
33 throw new \InvalidArgumentException();
34 }
35 }
36
37 38 39 40 41
42 public function __toString() {
43 return $this->string;
44 }
45
46 47 48 49 50 51
52 public function createAttribute($attribute) {
53 if ($this->hasAttribute($attribute)) {
54 throw new \InvalidArgumentException();
55 }
56
57 $this->attributes[$attribute] = array_fill(0, $this->length, false);
58 }
59
60 61 62 63 64 65
66 public function hasAttribute($attribute) {
67 return isset($this->attributes[$attribute]);
68 }
69
70 public function deleteAttribute($attribute) {
71 if (isset($this->attributes[$attribute])) {
72 unset($this->attributes[$attribute]);
73 }
74 }
75
76 77 78 79 80 81 82 83
84 public function setRange($from, $to, $attribute, $state = true) {
85
86 $from = min($from, $this->length);
87 $from = max($from, 0);
88 $to = min($to, $this->length);
89 $to = max($to, 0);
90
91
92 if ($from>$to) {
93 list($from, $to) = [$to, $from];
94 }
95
96
97 if (!$this->hasAttribute($attribute)) {
98 $this->createAttribute($attribute);
99 }
100
101
102 $this->attributes[$attribute] = array_replace($this->attributes[$attribute], array_fill($from, $to-$from+1, $state));
103 }
104
105 106 107 108 109 110 111 112
113 public function setLength($from, $length, $attribute, $state = true) {
114 return $this->setRange($from, $from + $length - 1, $attribute, $state);
115 }
116
117 118 119 120 121 122 123 124
125 public function setPattern($pattern, $attribute, $state = true) {
126 if ($ret = preg_match_all($pattern, $this->string, $matches, PREG_OFFSET_CAPTURE)) {
127 foreach($matches[0] as $match)
128 {
129 $match[1] = $this->byteToCharOffset($match[1]);
130 $this->setRange($match[1], $match[1]+mb_strlen($match[0], "utf-8")-1, $attribute, $state);
131 }
132
133 return $ret;
134 }
135 }
136
137 138 139 140 141 142 143 144 145
146 public function setSubstring($substring, $attribute, $all = true, $matchCase = true, $state = true) {
147 $offset = 0;
148 $length = mb_strlen($substring, "utf-8");
149 $func = $matchCase ? "mb_strpos" : "mb_stripos";
150
151 while (false !== $pos = $func($this->string, $substring, $offset, "utf-8")) {
152 $this->setRange($pos, $pos + $length - 1, $attribute, $state);
153 if (!$all) {
154 return;
155 }
156 $offset = $pos + $length;
157 }
158 }
159
160 161 162 163 164 165 166 167 168 169
170 public function searchAttribute($attribute, $offset = 0, $returnLength = false, $state = true, $strict = true) {
171 if (!$this->hasAttribute($attribute)) {
172 return false;
173 }
174
175 $a = $this->attributes[$attribute];
176
177 if ($offset) {
178 $a = array_slice($a, $offset, NULL, true);
179 }
180
181 $pos = array_search($state, $a, $strict);
182
183 if ($returnLength) {
184 if (false === $pos) {
185 return false;
186 }
187
188 $a = array_slice($a, $pos - $offset);
189 $length = array_search(!$state, $a, $strict);
190 $length = $length ? $length : $this->length - $pos;
191
192 return [$pos, $length];
193 } else {
194 return $pos;
195 }
196 }
197
198 199 200 201 202 203 204
205 public function is($attribute, $pos) {
206 return (isset($this->attributes[$attribute][$pos]) and $this->attributes[$attribute][$pos]);
207 }
208
209 210 211 212 213 214 215 216 217
218 public function substrings($attribute, $offset = 0, $state = true, $strict = true)
219 {
220 $substrings = [];
221 while (false !== $pl = $this->searchAttribute($attribute, $offset, true, $state, $strict))
222 {
223
224 $substring = mb_substr($this->string, $pl[0], $pl[1], "UTF-8");
225 $substrings[] = $substring;
226 $offset = $pl[0] + $pl[1];
227 }
228
229 return $substrings;
230 }
231
232 233 234 235 236 237 238 239 240 241
242 public function filter($attribute, $offset = 0, $state = true, $strict = true, $glue = "")
243 {
244 return implode($glue, $this->substrings($attribute, $offset, $state, $strict));
245 }
246
247 248 249 250 251 252
253 public function attributesAt($pos) {
254 $attributes = [];
255
256 foreach ($this->attributes as $attribute => &$map) {
257 if ($map[$pos]) {
258 $attributes[] = $attribute;
259 }
260 }
261
262 return $attributes;
263 }
264
265 266 267 268 269 270 271
272 public function toHtml($tag = "span", $classPrefix = "") {
273 foreach($this->attributes as $attribute => $map) $state[$attribute] = false;
274
275 $html = "";
276 $stack = [];
277 $lastPos = 0;
278
279 for ($i=0; $i<$this->length; $i++)
280 {
281 foreach($this->attributes as $attribute => &$map)
282 {
283 if ($this->attributes[$attribute][$i] != $state[$attribute])
284 {
285 $state[$attribute] = $this->attributes[$attribute][$i];
286
287 $html .= mb_substr($this->string, $lastPos, $i-$lastPos, "utf-8");
288 $lastPos = $i;
289
290 if ($state[$attribute])
291 {
292 $html .= "<$tag class=\"$classPrefix$attribute\">";
293 $stack[] = $attribute;
294 }
295 else
296 {
297
298
299 $stashed = [];
300 while($open = array_pop($stack))
301 {
302 $html .= "</$tag>";
303 if ($attribute == $open) {
304 break;
305 }
306 $stashed[] = $open;
307 }
308
309
310 foreach($stashed as $a) {
311 $stack[] = $a;
312 $html .= "<$tag class=\"$classPrefix$a\">";
313 }
314 }
315 }
316 }
317 }
318
319 $html .= mb_substr($this->string, $lastPos, $this->length-$lastPos, 'utf-8');
320
321
322 $html .= str_repeat("</$tag>", count($stack));
323
324 return $html;
325 }
326
327 328 329 330 331 332 333 334 335
336 public function combineAttributes($op, $attribute1, $attribute2 = false, $to = false)
337 {
338 $to = isset($to) ? $to : $attribute1;
339 $op = strtolower($op);
340
341 if ($op == "not") {
342 $attribute2 = $attribute1;
343 }
344
345 if (!$this->hasAttribute($attribute1) or !$this->hasAttribute($attribute2)) {
346 throw new \InvalidArgumentException("Attribute does not exist");
347 }
348
349 if (!isset($this->attributes[$to])) {
350 $this->attributes[$to] = [];
351 }
352
353
354 switch ($op) {
355 case 'or':
356 for($i = 0; $i < $this->length; $i++) {
357 $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] || $this->attributes[$attribute2][$i];
358 }
359 break;
360
361 case 'xor':
362 for($i = 0; $i < $this->length; $i++) {
363 $this->attributes[$to][$i] = ($this->attributes[$attribute1][$i] xor $this->attributes[$attribute2][$i]);
364 }
365 break;
366
367 case 'and':
368 for($i = 0; $i < $this->length; $i++) {
369 $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] && $this->attributes[$attribute2][$i];
370 }
371 break;
372
373 case 'not':
374 for($i = 0; $i < $this->length; $i++) {
375 $this->attributes[$to][$i] = !$this->attributes[$attribute1][$i];
376 }
377 break;
378
379 default:
380 throw new \InvalidArgumentException("Unknown operation");
381 }
382 }
383
384 385 386 387 388 389 390
391 public function attributeToString($attribute, $true = "-", $false = " ") {
392 $map = $this->attributes[$attribute];
393
394 return implode("", array_map(function($v) use ($true, $false) {
395 return $v ? $true : $false;
396 }, $map));
397 }
398
399 400 401 402 403
404 public function enablebyteToCharCache() {
405 $this->byteToChar = [];
406 $char = 0;
407 for ($i = 0; $i < strlen($this->string); ) {
408 $char++;
409 $byte = $this->string[$i];
410 $cl = self::utf8CharLen($byte);
411 $i += $cl;
412
413 $this->byteToChar[$i] = $char;
414 }
415 }
416
417 protected function byteToCharOffset($boff) {
418 if (isset($this->byteToChar[$boff])) return $this->byteToChar[$boff];
419
420 return $this->byteToChar[$boff] = self::byteToCharOffsetString($this->string, $boff);
421 }
422
423 protected function charToByteOffset($char) {
424 $byte = strlen(mb_substr($this->string, 0, $char, "utf-8"));
425 if (!isset($this->byteToChar[$byte])) $this->byteToChar[$byte] = $char;
426
427 return $byte;
428 }
429
430 protected static function byteToCharOffsetString($string, $boff) {
431 $result = 0;
432
433 for ($i = 0; $i < $boff; ) {
434 $result++;
435 $byte = $string[$i];
436 $cl = self::utf8CharLen($byte);
437 $i += $cl;
438 }
439
440 return $result;
441 }
442
443 protected static function utf8CharLen($byte) {
444 $base2 = str_pad(base_convert((string) ord($byte), 10, 2), 8, "0", STR_PAD_LEFT);
445 $p = strpos($base2, "0");
446
447 if ($p == 0) {
448 return 1;
449 } elseif ($p <= 4) {
450 return $p;
451 } else {
452 throw new \InvalidArgumentException();
453 }
454 }
455
456
457
458 459 460 461 462
463 public function count() {
464 return $this->length;
465 }
466
467
468
469 470 471 472 473 474
475 public function offsetExists($offset) {
476 return $offest < $this->length;
477 }
478
479 480 481 482 483 484 485 486
487 public function offsetGet($offset) {
488 return mb_substr($this->string, $offset, 1, "utf-8");
489 }
490
491 492 493 494 495
496 public function offsetSet($offset, $value) {
497 throw new \InvalidArgumentException("AttributedString is immutable");
498 }
499
500 501 502 503 504
505 public function offsetUnset($offset) {
506 throw new \InvalidArgumentException("AttributedString is immutable");
507 }
508 }
509