Overview
  • Namespace
  • Class

Namespaces

  • apemsel
    • AttributedString

Classes

  • apemsel\AttributedString\AttributedString
  • apemsel\AttributedString\MutableAttributedString
  • apemsel\AttributedString\TokenizedAttributedString
  1 <?php
  2 namespace apemsel\AttributedString;
  3 
  4 /**
  5  * Basic class to work with attributed strings.
  6  *
  7  * Attributed strings are strings that can have multiple attributes per character of the string
  8  *
  9  * @author Adrian Pemsel <apemsel@gmail.com>
 10  */
 11 class AttributedString implements \Countable
 12 {
 13   protected $string;
 14   protected $attributes;
 15   protected $length;
 16   protected $byteToChar;
 17   
 18   /**
 19    * @param string|AttributedString $string Either a simple string or another AttributedString to init the AttributedString
 20    */
 21   public function __construct($string) {
 22     if (is_string($string)) {
 23       $this->string = $string;
 24       $this->length = mb_strlen($string, "utf-8");
 25     }
 26     elseif ($string instanceof AttributedString) {
 27       $this->string = $string->string;
 28       $this->attributes = $string->attributes;
 29       $this->lenght = $string->length;
 30       $this->byteToChar = $string->byteToChar;
 31     }
 32     else {
 33       throw new \InvalidArgumentException();
 34     }
 35   }
 36   
 37   /**
 38    * Returns the native string
 39    *
 40    * @return string The native string representation of the AttributedString without attributes
 41    */
 42   public function __toString() {
 43     return $this->string;
 44   }
 45   
 46   /**
 47    * Creates a new attribute layer
 48    *
 49    * @param string $attribute The name of the new attribute
 50    * @throws InvalidArgumentException if the attribute already exists
 51    */
 52   public function createAttribute($attribute) {
 53     if ($this->hasAttribute($attribute)) {
 54       throw new \InvalidArgumentException();
 55     }
 56     
 57     $this->attributes[$attribute] = array_fill(0, $this->length, false);
 58   }
 59   
 60   /**
 61    * Check if the given attribute exists
 62    *
 63    * @param string $attribute The name of the attribute to check
 64    * @return bool
 65    */
 66   public function hasAttribute($attribute) {
 67     return isset($this->attributes[$attribute]);
 68   }
 69   
 70   public function deleteAttribute($attribute) {
 71     if (isset($this->attributes[$attribute])) {
 72       unset($this->attributes[$attribute]);
 73     }
 74   }
 75   
 76   /**
 77    * Set given range of the string to an attribute and state
 78    *
 79    * @param int $from start offset
 80    * @param int $to end offset
 81    * @param string $attribute name of the attribute to be set
 82    * @param bool $state set state to true (default) or false
 83    */
 84   public function setRange($from, $to, $attribute, $state = true) {
 85     // Ensure correct range
 86     $from = min($from, $this->length);
 87     $from = max($from, 0);
 88     $to = min($to, $this->length);
 89     $to = max($to, 0);
 90     
 91     // Be kind and swap from and to if mixed up
 92     if ($from>$to) {
 93       list($from, $to) = [$to, $from];
 94     }
 95     
 96     // Create attribute if it does not exist
 97     if (!$this->hasAttribute($attribute)) {
 98       $this->createAttribute($attribute);
 99     }
100 
101     // Set attribute state for given range
102     $this->attributes[$attribute] = array_replace($this->attributes[$attribute], array_fill($from, $to-$from+1, $state));
103   }
104   
105   /**
106    * Set given length of the string to an attribute and state
107    *
108    * @param int $from start offset
109    * @param int $length length to be set
110    * @param string $attribute name of the attribute to be set
111    * @param bool $state set state to true (default) or false
112    */
113   public function setLength($from, $length, $attribute, $state = true) {
114     return $this->setRange($from, $from + $length - 1, $attribute, $state);
115   }
116   
117   /**
118    * Set parts of the string matching a given regex to an attribute and state
119    *
120    * @param string $pattern regex pattern
121    * @param string $attribute name of the attribute to be set
122    * @param bool $state set state to true (default) or false
123    * @return int number of matches
124    */
125   public function setPattern($pattern, $attribute, $state = true) {
126     if ($ret = preg_match_all($pattern, $this->string, $matches, PREG_OFFSET_CAPTURE)) {
127       foreach($matches[0] as $match)
128       {
129         $match[1] = $this->byteToCharOffset($match[1]);
130         $this->setRange($match[1], $match[1]+mb_strlen($match[0], "utf-8")-1, $attribute, $state);
131       }
132 
133       return $ret;
134     }
135   }
136   
137   /**
138    * Set given substring to an attribute and state
139    *
140    * @param string $substring the substring to search
141    * @param string $attribute name of the attribute to be set
142    * @param bool $all set first or all occurences of the substring
143    * @param bool $matchCase match or ignore case
144    * @param bool $state set state to true (default) or false
145    */
146   public function setSubstring($substring, $attribute, $all = true, $matchCase = true, $state = true) {
147     $offset = 0;
148     $length = mb_strlen($substring, "utf-8");
149     $func = $matchCase ? "mb_strpos" : "mb_stripos";
150     
151     while (false !== $pos = $func($this->string, $substring, $offset, "utf-8")) {
152       $this->setRange($pos, $pos + $length - 1, $attribute, $state);
153       if (!$all) {
154         return;
155       }
156       $offset = $pos + $length;
157     }
158   }
159   
160   /**
161    * Search inside the string for ranges with the given attribute
162    *
163    * @param string $attribute name of the attribute to search
164    * @param int $offset start offset
165    * @param bool $returnLength if true (default is false), return an array with position and length of the found range
166    * @param bool $state the state to look for (default is true)
167    * @param bool $strict perform strict comparison during search
168    * @return int|int[] either position or position and lenght in an array
169    */
170   public function searchAttribute($attribute, $offset = 0, $returnLength = false, $state = true, $strict = true) {
171     if (!$this->hasAttribute($attribute)) {
172       return false;
173     }
174     
175     $a = $this->attributes[$attribute];
176 
177     if ($offset) {
178       $a = array_slice($a, $offset, $this->length, true);
179     }
180     
181     $pos = array_search($state, $a, $strict);
182     
183     if ($returnLength) {
184       if (false === $pos) {
185         return false;
186       }
187       
188       $a = array_slice($a, $pos);
189       $length = array_search(!$state, $a, $strict);
190       $length = $length ? $length : $this->length - $pos;
191 
192       return [$pos, $length];
193     } else {
194       return $pos;
195     }
196   }
197   
198   /**
199    * Check for given attribute at a offset
200    *
201    * @param string $attribute name of the attribute to check
202    * @param int $pos offset to check
203    * @return bool true if string has the attribute at the given position
204    */
205   public function is($attribute, $pos) {
206     return (isset($this->attributes[$attribute][$pos]) and $this->attributes[$attribute][$pos]);
207   }
208   
209   /**
210    * Return all attributes at a given offset
211    *
212    * @param int $pos offset
213    * @return string[] attributes at the given offset
214    */
215   public function attributesAt($pos) {
216     $attributes = [];
217 
218     foreach ($this->attributes as $attribute => &$map) {
219       if ($map[$pos]) {
220         $attributes[] = $attribute;
221       }
222     }
223 
224     return $attributes;
225   }
226   
227   /**
228    * Convert to HTML, using a given class to mark attribute spans
229    *
230    * @param string $tag HTML tag to use for the spans (defaults is "<span>")
231    * @param string $classPrefix Optional prefix used to convert the attribute names to class names
232    * @return string HTML
233    * @throws Exception if the AttributedString cannot be converted to HTML due to improper nesting
234    */
235   public function toHtml($tag = "span", $classPrefix = "") {
236     foreach($this->attributes as $attribute => $map) $state[$attribute] = false;
237 
238     $html = "";
239     $stack = [];
240     $lastPos = 0;
241 
242     for ($i=0; $i<$this->length; $i++)
243     {
244       foreach($this->attributes as $attribute => &$map)
245       {
246         if ($this->attributes[$attribute][$i] != $state[$attribute])
247         {
248           $state[$attribute] = $this->attributes[$attribute][$i];
249 
250           $html .= mb_substr($this->string, $lastPos, $i-$lastPos, "utf-8");
251           $lastPos = $i;
252 
253           if ($state[$attribute])
254           {
255             $html .= "<$tag class=\"$classPrefix$attribute\">";
256             $stack[] = $attribute;
257           }
258           else
259           {
260             if ($attribute != array_pop($stack))
261             {
262               throw new Exception("Attributes are not properly nested for HTML conversion");
263             }
264             $html .= "</$tag>";
265           }
266         }
267       }
268     }
269 
270     $html .= mb_substr($this->string, $lastPos, $this->length-$lastPos, 'utf-8');
271 
272     // Close all spans that remained open
273     $html .= str_repeat("</$tag>", count($stack));
274 
275     return $html;
276   }
277   
278   /**
279    * Combine attributes with the given boolean operation
280    *
281    * @param string $op one of or|xor|and|not
282    * @param string $attribute1 name of the first attribute
283    * @param string $attribute2 Name of the second attribute. Ignored for "not" operation.
284    * @param string $to optional name of the attribute to copy the result to
285    * @throws InvalidArgumentException if one of the attributes does not exist or an unkown operation is given
286    */
287   public function combineAttributes($op, $attribute1, $attribute2 = false, $to = false)
288   {
289     $to = isset($to) ? $to : $attribute1;
290     $op = strtolower($op);
291     
292     if ($op == "not") {
293       $attribute2 = $attribute1;
294     }
295 
296     if (!$this->hasAttribute($attribute1) or !$this->hasAttribute($attribute2)) {
297       throw new \InvalidArgumentException("Attribute does not exist");
298     }
299     
300     if (!isset($this->attributes[$to])) {
301       $this->attributes[$to] = []; // No need to init because array is created below
302     }
303     
304     // Switch outside the loops for speed
305     switch ($op) {
306       case 'or':
307         for($i = 0; $i < $this->length; $i++) {
308           $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] || $this->attributes[$attribute2][$i];
309         }
310       break;
311       
312       case 'xor':
313         for($i = 0; $i < $this->length; $i++) {
314           $this->attributes[$to][$i] = ($this->attributes[$attribute1][$i] xor $this->attributes[$attribute2][$i]);
315         }
316       break;
317 
318       case 'and':
319         for($i = 0; $i < $this->length; $i++) {
320           $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] && $this->attributes[$attribute2][$i];
321         }
322       break;
323 
324       case 'not':
325         for($i = 0; $i < $this->length; $i++) {
326           $this->attributes[$to][$i] = !$this->attributes[$attribute1][$i];
327         }
328       break;
329       
330       default:
331         throw new \InvalidArgumentException("Unknown operation");
332     }
333   }
334   
335   /**
336    * Enable and fill cache for byte to char offset conversion
337    *
338    * May improve performance if setPattern is used extensively
339    */
340   public function enablebyteToCharCache() {
341     $this->byteToChar = [];
342     $char = 0;
343     for ($i = 0; $i < strlen($this->string); ) {
344       $char++;
345       $byte = $this->string[$i];
346       $cl = self::utf8CharLen($byte);
347       $i += $cl;
348       
349       $this->byteToChar[$i] = $char;
350     }
351   }
352   
353   protected function byteToCharOffset($boff) {
354     if (isset($this->byteToChar[$boff])) return $this->byteToChar[$boff];
355     
356     return $this->byteToChar[$boff] = self::byteToCharOffsetString($this->string, $boff);
357   }
358 
359   protected function charToByteOffset($char) {
360     $byte = strlen(mb_substr($this->string, 0, $char, "utf-8"));
361     if (!isset($this->byteToChar[$byte])) $this->byteToChar[$byte] = $char;
362     
363     return $byte;
364   }
365   
366   protected static function byteToCharOffsetString($string, $boff) {
367     $result = 0;
368     
369     for ($i = 0; $i < $boff; ) {
370       $result++;
371       $byte = $string[$i];
372       $cl = self::utf8CharLen($byte);
373       $i += $cl;
374     }
375     
376     return $result;
377   }
378   
379   protected static function utf8CharLen($byte) {
380     $base2 = str_pad(base_convert((string) ord($byte), 10, 2), 8, "0", STR_PAD_LEFT);
381     $p = strpos($base2, "0");
382     
383     if ($p == 0) {
384       return 1;
385     } elseif ($p <= 4) {
386       return $p;
387     } else {
388       throw new \InvalidArgumentException();
389     }
390   }
391   
392   /**
393    * Return string length (number of UTF-8 chars, not strlen())
394    *
395    * @return int string length
396    */
397   public function count() {
398     return $this->length;
399   }
400 }
401 
API documentation generated by ApiGen