Overview
  • Namespace
  • Class

Namespaces

  • apemsel
    • AttributedString

Classes

  • apemsel\AttributedString\AttributedString
  • apemsel\AttributedString\MutableAttributedString
  • apemsel\AttributedString\TokenizedAttributedString
  1 <?php
  2 namespace apemsel\AttributedString;
  3 
  4 /**
  5  * Basic class to work with attributed strings.
  6  *
  7  * Attributed strings are strings that can have multiple attributes per character of the string
  8  *
  9  * @author Adrian Pemsel <apemsel@gmail.com>
 10  */
 11 class AttributedString implements \Countable, \ArrayAccess
 12 {
 13   protected $string;
 14   protected $attributes = [];
 15   protected $length;
 16   protected $byteToChar;
 17   
 18   /**
 19    * @param string|AttributedString $string Either a simple string or another AttributedString to init the AttributedString
 20    */
 21   public function __construct($string) {
 22     if (is_string($string)) {
 23       $this->string = $string;
 24       $this->length = mb_strlen($string, "utf-8");
 25     }
 26     elseif ($string instanceof AttributedString) {
 27       $this->string = $string->string;
 28       $this->attributes = $string->attributes;
 29       $this->lenght = $string->length;
 30       $this->byteToChar = $string->byteToChar;
 31     }
 32     else {
 33       throw new \InvalidArgumentException();
 34     }
 35   }
 36   
 37   /**
 38    * Returns the native string
 39    *
 40    * @return string The native string representation of the AttributedString without attributes
 41    */
 42   public function __toString() {
 43     return $this->string;
 44   }
 45   
 46   /**
 47    * Creates a new attribute layer
 48    *
 49    * @param string $attribute The name of the new attribute
 50    * @throws InvalidArgumentException if the attribute already exists
 51    */
 52   public function createAttribute($attribute) {
 53     if ($this->hasAttribute($attribute)) {
 54       throw new \InvalidArgumentException();
 55     }
 56     
 57     $this->attributes[$attribute] = array_fill(0, $this->length, false);
 58   }
 59   
 60   /**
 61    * Check if the given attribute exists
 62    *
 63    * @param string $attribute The name of the attribute to check
 64    * @return bool
 65    */
 66   public function hasAttribute($attribute) {
 67     return isset($this->attributes[$attribute]);
 68   }
 69   
 70   public function deleteAttribute($attribute) {
 71     if (isset($this->attributes[$attribute])) {
 72       unset($this->attributes[$attribute]);
 73     }
 74   }
 75   
 76   /**
 77    * Set given range of the string to an attribute and state
 78    *
 79    * @param int $from start offset
 80    * @param int $to end offset
 81    * @param string $attribute name of the attribute to be set
 82    * @param bool $state set state to true (default) or false
 83    */
 84   public function setRange($from, $to, $attribute, $state = true) {
 85     // Ensure correct range
 86     $from = min($from, $this->length);
 87     $from = max($from, 0);
 88     $to = min($to, $this->length);
 89     $to = max($to, 0);
 90     
 91     // Be kind and swap from and to if mixed up
 92     if ($from>$to) {
 93       list($from, $to) = [$to, $from];
 94     }
 95     
 96     // Create attribute if it does not exist
 97     if (!$this->hasAttribute($attribute)) {
 98       $this->createAttribute($attribute);
 99     }
100 
101     // Set attribute state for given range
102     $this->attributes[$attribute] = array_replace($this->attributes[$attribute], array_fill($from, $to-$from+1, $state));
103   }
104   
105   /**
106    * Set given length of the string to an attribute and state
107    *
108    * @param int $from start offset
109    * @param int $length length to be set
110    * @param string $attribute name of the attribute to be set
111    * @param bool $state set state to true (default) or false
112    */
113   public function setLength($from, $length, $attribute, $state = true) {
114     return $this->setRange($from, $from + $length - 1, $attribute, $state);
115   }
116   
117   /**
118    * Set parts of the string matching a given regex to an attribute and state
119    *
120    * @param string $pattern regex pattern
121    * @param string $attribute name of the attribute to be set
122    * @param bool $state set state to true (default) or false
123    * @return int number of matches
124    */
125   public function setPattern($pattern, $attribute, $state = true) {
126     if ($ret = preg_match_all($pattern, $this->string, $matches, PREG_OFFSET_CAPTURE)) {
127       foreach($matches[0] as $match)
128       {
129         $match[1] = $this->byteToCharOffset($match[1]);
130         $this->setRange($match[1], $match[1]+mb_strlen($match[0], "utf-8")-1, $attribute, $state);
131       }
132 
133       return $ret;
134     }
135   }
136   
137   /**
138    * Set given substring to an attribute and state
139    *
140    * @param string $substring the substring to search
141    * @param string $attribute name of the attribute to be set
142    * @param bool $all set first or all occurences of the substring
143    * @param bool $matchCase match or ignore case
144    * @param bool $state set state to true (default) or false
145    */
146   public function setSubstring($substring, $attribute, $all = true, $matchCase = true, $state = true) {
147     $offset = 0;
148     $length = mb_strlen($substring, "utf-8");
149     $func = $matchCase ? "mb_strpos" : "mb_stripos";
150     
151     while (false !== $pos = $func($this->string, $substring, $offset, "utf-8")) {
152       $this->setRange($pos, $pos + $length - 1, $attribute, $state);
153       if (!$all) {
154         return;
155       }
156       $offset = $pos + $length;
157     }
158   }
159   
160   /**
161    * Search inside the string for ranges with the given attribute
162    *
163    * @param string $attribute name of the attribute to search
164    * @param int $offset start offset
165    * @param bool $returnLength if true (default is false), return an array with position and length of the found range
166    * @param bool $state the state to look for (default is true)
167    * @param bool $strict perform strict comparison during search
168    * @return int|int[] either position or position and lenght in an array
169    */
170   public function searchAttribute($attribute, $offset = 0, $returnLength = false, $state = true, $strict = true) {
171     if (!$this->hasAttribute($attribute)) {
172       return false;
173     }
174     
175     $a = $this->attributes[$attribute];
176 
177     if ($offset) {
178       $a = array_slice($a, $offset, NULL, true);
179     }
180     
181     $pos = array_search($state, $a, $strict);
182     
183     if ($returnLength) {
184       if (false === $pos) {
185         return false;
186       }
187       
188       $a = array_slice($a, $pos - $offset);
189       $length = array_search(!$state, $a, $strict);
190       $length = $length ? $length : $this->length - $pos;
191 
192       return [$pos, $length];
193     } else {
194       return $pos;
195     }
196   }
197   
198   /**
199    * Check for given attribute at a offset
200    *
201    * @param string $attribute name of the attribute to check
202    * @param int $pos offset to check
203    * @return bool true if string has the attribute at the given position
204    */
205   public function is($attribute, $pos) {
206     return (isset($this->attributes[$attribute][$pos]) and $this->attributes[$attribute][$pos]);
207   }
208   
209   /**
210    * Return an array of substrings that have a given attribute
211    *
212    * @param string $attribute name of the attribute
213    * @param int $pos offset
214    * @param bool $state the state to look for (default is true)
215    * @param bool $strict perform strict comparison during search
216    * @return string[] array of strings with given attribute
217    */
218   public function substrings($attribute, $offset = 0, $state = true, $strict = true)
219   {
220     $substrings = [];
221     while (false !== $pl = $this->searchAttribute($attribute, $offset, true, $state, $strict))
222     {
223       //var_dump($pl);
224       $substring = mb_substr($this->string, $pl[0], $pl[1], "UTF-8");
225       $substrings[] = $substring;
226       $offset = $pl[0] + $pl[1];
227     }
228     
229     return $substrings;
230   }
231   
232   /**
233    * Return all parts of the string that have a given attribute as new string
234    *
235    * @param string $attribute name of the attribute
236    * @param int $pos offset
237    * @param bool $state the state to look for (default is true)
238    * @param bool $strict perform strict comparison during search
239    * @param string $glue glue that is inserted between the parts, default is nothing ("")
240    * @return string combined filtered string
241    */
242   public function filter($attribute, $offset = 0, $state = true, $strict = true, $glue = "")
243   {
244     return implode($glue, $this->substrings($attribute, $offset, $state, $strict));
245   }
246   
247   /**
248    * Return all attributes at a given offset
249    *
250    * @param int $pos offset
251    * @return string[] attributes at the given offset
252    */
253   public function attributesAt($pos) {
254     $attributes = [];
255 
256     foreach ($this->attributes as $attribute => &$map) {
257       if ($map[$pos]) {
258         $attributes[] = $attribute;
259       }
260     }
261 
262     return $attributes;
263   }
264   
265   /**
266    * Convert to HTML, using a given class to mark attribute spans
267    *
268    * @param string $tag HTML tag to use for the spans (defaults is "<span>")
269    * @param string $classPrefix Optional prefix used to convert the attribute names to class names
270    * @return string HTML
271    */
272   public function toHtml($tag = "span", $classPrefix = "") {
273     foreach($this->attributes as $attribute => $map) $state[$attribute] = false;
274 
275     $html = "";
276     $stack = [];
277     $lastPos = 0;
278 
279     for ($i=0; $i<$this->length; $i++)
280     {
281       foreach($this->attributes as $attribute => &$map)
282       {
283         if ($this->attributes[$attribute][$i] != $state[$attribute])
284         {
285           $state[$attribute] = $this->attributes[$attribute][$i];
286 
287           $html .= mb_substr($this->string, $lastPos, $i-$lastPos, "utf-8");
288           $lastPos = $i;
289 
290           if ($state[$attribute])
291           {
292             $html .= "<$tag class=\"$classPrefix$attribute\">";
293             $stack[] = $attribute;
294           }
295           else
296           {
297             // Close attribute span. If the top of the stack does not equal the attribute to be closed
298             // close, pop and stash it. This happens when span a ends in span b.
299             $stashed = [];
300             while($open = array_pop($stack))
301             {
302               $html .= "</$tag>";
303               if ($attribute == $open) {
304                 break;
305               }
306               $stashed[] = $open;
307             }
308             
309             // Now repopen the stashed spans and put them back on the stack.
310             foreach($stashed as $a) {
311               $stack[] = $a;
312               $html .= "<$tag class=\"$classPrefix$a\">";
313             }
314           }
315         }
316       }
317     }
318 
319     $html .= mb_substr($this->string, $lastPos, $this->length-$lastPos, 'utf-8');
320 
321     // Close all spans that remained open
322     $html .= str_repeat("</$tag>", count($stack));
323 
324     return $html;
325   }
326   
327   /**
328    * Combine attributes with the given boolean operation
329    *
330    * @param string $op one of or|xor|and|not
331    * @param string $attribute1 name of the first attribute
332    * @param string $attribute2 Name of the second attribute. Ignored for "not" operation.
333    * @param string $to optional name of the attribute to copy the result to
334    * @throws InvalidArgumentException if one of the attributes does not exist or an unkown operation is given
335    */
336   public function combineAttributes($op, $attribute1, $attribute2 = false, $to = false)
337   {
338     $to = isset($to) ? $to : $attribute1;
339     $op = strtolower($op);
340     
341     if ($op == "not") {
342       $attribute2 = $attribute1;
343     }
344 
345     if (!$this->hasAttribute($attribute1) or !$this->hasAttribute($attribute2)) {
346       throw new \InvalidArgumentException("Attribute does not exist");
347     }
348     
349     if (!isset($this->attributes[$to])) {
350       $this->attributes[$to] = []; // No need to init because array is created below
351     }
352     
353     // Switch outside the loops for speed
354     switch ($op) {
355       case 'or':
356         for($i = 0; $i < $this->length; $i++) {
357           $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] || $this->attributes[$attribute2][$i];
358         }
359       break;
360       
361       case 'xor':
362         for($i = 0; $i < $this->length; $i++) {
363           $this->attributes[$to][$i] = ($this->attributes[$attribute1][$i] xor $this->attributes[$attribute2][$i]);
364         }
365       break;
366 
367       case 'and':
368         for($i = 0; $i < $this->length; $i++) {
369           $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] && $this->attributes[$attribute2][$i];
370         }
371       break;
372 
373       case 'not':
374         for($i = 0; $i < $this->length; $i++) {
375           $this->attributes[$to][$i] = !$this->attributes[$attribute1][$i];
376         }
377       break;
378       
379       default:
380         throw new \InvalidArgumentException("Unknown operation");
381     }
382   }
383   
384   /**
385    * Convert attribute map to a visual string representation (e.g. for debugging)
386    *
387    * @param string $attribute name of the attribute
388    * @param string $true char to use for true state of attribute
389    * @param string $false char to use for false state of attribute
390    */
391   public function attributeToString($attribute, $true = "-", $false = " ") {
392     $map = $this->attributes[$attribute];
393     
394     return implode("", array_map(function($v) use ($true, $false) {
395       return $v ? $true : $false;
396     }, $map));
397   }
398   
399   /**
400    * Enable and fill cache for byte to char offset conversion
401    *
402    * May improve performance if setPattern is used extensively
403    */
404   public function enablebyteToCharCache() {
405     $this->byteToChar = [];
406     $char = 0;
407     for ($i = 0; $i < strlen($this->string); ) {
408       $char++;
409       $byte = $this->string[$i];
410       $cl = self::utf8CharLen($byte);
411       $i += $cl;
412       
413       $this->byteToChar[$i] = $char;
414     }
415   }
416   
417   protected function byteToCharOffset($boff) {
418     if (isset($this->byteToChar[$boff])) return $this->byteToChar[$boff];
419     
420     return $this->byteToChar[$boff] = self::byteToCharOffsetString($this->string, $boff);
421   }
422 
423   protected function charToByteOffset($char) {
424     $byte = strlen(mb_substr($this->string, 0, $char, "utf-8"));
425     if (!isset($this->byteToChar[$byte])) $this->byteToChar[$byte] = $char;
426     
427     return $byte;
428   }
429   
430   protected static function byteToCharOffsetString($string, $boff) {
431     $result = 0;
432     
433     for ($i = 0; $i < $boff; ) {
434       $result++;
435       $byte = $string[$i];
436       $cl = self::utf8CharLen($byte);
437       $i += $cl;
438     }
439     
440     return $result;
441   }
442   
443   protected static function utf8CharLen($byte) {
444     $base2 = str_pad(base_convert((string) ord($byte), 10, 2), 8, "0", STR_PAD_LEFT);
445     $p = strpos($base2, "0");
446     
447     if ($p == 0) {
448       return 1;
449     } elseif ($p <= 4) {
450       return $p;
451     } else {
452       throw new \InvalidArgumentException();
453     }
454   }
455   
456   // Countable interface
457   
458   /**
459    * Return string length (number of UTF-8 chars, not strlen())
460    *
461    * @return int string length
462    */
463   public function count() {
464     return $this->length;
465   }
466   
467   // ArrayAccess interface
468   
469   /**
470    * Check if the given offset exists in the string
471    *
472    * @param int $offset offset
473    * @return bool does the offset exist
474    */
475   public function offsetExists($offset) {
476     return $offest < $this->length;
477   }
478   
479   /**
480    * Get char at given offset
481    *
482    * Note: Since AttributedString is using UTF-8, the returned char may be longer than 1 byte!
483    *
484    * @param int $offset offset
485    * @return string character
486    */
487   public function offsetGet($offset) {
488     return mb_substr($this->string, $offset, 1, "utf-8");
489   }
490   
491   /**
492    * Not implemented since AttributedString is immutable
493    *
494    * @throws InvalidArgumentException always
495    */
496   public function offsetSet($offset, $value) {
497     throw new \InvalidArgumentException("AttributedString is immutable");
498   }
499   
500   /**
501    * Not implemented since AttributedString is immutable
502    *
503    * @throws InvalidArgumentException always
504    */
505   public function offsetUnset($offset) {
506     throw new \InvalidArgumentException("AttributedString is immutable");
507   }
508 }
509 
API documentation generated by ApiGen