1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10:
11:
12:
13: define('TEXY_VERSION', '2.1');
14:
15:
16: 17: 18:
19: if (extension_loaded('mbstring')) {
20: if (mb_get_info('func_overload') & 2 && substr(mb_get_info('internal_encoding'), 0, 1) === 'U') { 21: mb_internal_encoding('pass');
22: trigger_error("Texy: mb_internal_encoding changed to 'pass'", E_USER_WARNING);
23: }
24: }
25:
26: if (ini_get('zend.ze1_compatibility_mode') % 256 ||
27: preg_match('#on$|true$|yes$#iA', ini_get('zend.ze1_compatibility_mode'))) {
28: throw new RuntimeException("Texy cannot run with zend.ze1_compatibility_mode enabled.");
29: }
30:
31:
32: 33: require_once dirname(__FILE__) . '/libs/RegExp.Patterns.php';
34: require_once dirname(__FILE__) . '/libs/TexyObject.php';
35: require_once dirname(__FILE__) . '/libs/TexyHtml.php';
36: require_once dirname(__FILE__) . '/libs/TexyModifier.php';
37: require_once dirname(__FILE__) . '/libs/TexyModule.php';
38: require_once dirname(__FILE__) . '/libs/TexyParser.php';
39: require_once dirname(__FILE__) . '/libs/TexyUtf.php';
40: require_once dirname(__FILE__) . '/libs/TexyConfigurator.php';
41: require_once dirname(__FILE__) . '/libs/TexyHandlerInvocation.php';
42: require_once dirname(__FILE__) . '/modules/TexyParagraphModule.php';
43: require_once dirname(__FILE__) . '/modules/TexyBlockModule.php';
44: require_once dirname(__FILE__) . '/modules/TexyHeadingModule.php';
45: require_once dirname(__FILE__) . '/modules/TexyHorizLineModule.php';
46: require_once dirname(__FILE__) . '/modules/TexyHtmlModule.php';
47: require_once dirname(__FILE__) . '/modules/TexyFigureModule.php';
48: require_once dirname(__FILE__) . '/modules/TexyImageModule.php';
49: require_once dirname(__FILE__) . '/modules/TexyLinkModule.php';
50: require_once dirname(__FILE__) . '/modules/TexyListModule.php';
51: require_once dirname(__FILE__) . '/modules/TexyLongWordsModule.php';
52: require_once dirname(__FILE__) . '/modules/TexyPhraseModule.php';
53: require_once dirname(__FILE__) . '/modules/TexyBlockQuoteModule.php';
54: require_once dirname(__FILE__) . '/modules/TexyScriptModule.php';
55: require_once dirname(__FILE__) . '/modules/TexyEmoticonModule.php';
56: require_once dirname(__FILE__) . '/modules/TexyTableModule.php';
57: require_once dirname(__FILE__) . '/modules/TexyTypographyModule.php';
58: require_once dirname(__FILE__) . '/modules/TexyHtmlOutputModule.php';
59:
60:
61:
62: 63: 64:
65: if (!class_exists('LogicException', FALSE)) {
66: class LogicException extends Exception {}
67: }
68:
69: if (!class_exists('InvalidArgumentException', FALSE)) {
70: class InvalidArgumentException extends LogicException {}
71: }
72:
73: if (!class_exists('RuntimeException', FALSE)) {
74: class RuntimeException extends Exception {}
75: }
76:
77: if (!class_exists('UnexpectedValueException', FALSE)) {
78: class UnexpectedValueException extends RuntimeException {}
79: }
80:
81:
82:
83: 84: 85:
86: if (!class_exists('NotSupportedException', FALSE)) {
87: class NotSupportedException extends LogicException {}
88: }
89:
90: if (!class_exists('MemberAccessException', FALSE)) {
91: class MemberAccessException extends LogicException {}
92: }
93:
94: if (!class_exists('InvalidStateException', FALSE)) {
95: class InvalidStateException extends RuntimeException {}
96: }
97:
98:
99:
100: 101: 102:
103: define('TEXY_ALL', TRUE);
104: define('TEXY_NONE', FALSE);
105: define('TEXY_CONTENT_MARKUP', "\x17");
106: define('TEXY_CONTENT_REPLACED', "\x16");
107: define('TEXY_CONTENT_TEXTUAL', "\x15");
108: define('TEXY_CONTENT_BLOCK', "\x14");
109:
110:
111:
112:
113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123:
124: class Texy extends TexyObject
125: {
126: 127: const ALL = TRUE;
128: const NONE = FALSE;
129:
130: 131: const VERSION = TEXY_VERSION;
132: const REVISION = '9338a11 released on 2010-05-20';
133:
134: 135: const CONTENT_MARKUP = "\x17";
136: const CONTENT_REPLACED = "\x16";
137: const CONTENT_TEXTUAL = "\x15";
138: const CONTENT_BLOCK = "\x14";
139:
140: 141: const FILTER_ANCHOR = 'anchor';
142: const FILTER_IMAGE = 'image';
143:
144: 145: const XML = 2;
146:
147: 148: const HTML4_TRANSITIONAL = 0;
149: const HTML4_STRICT = 1;
150: const HTML5 = 4;
151: const XHTML1_TRANSITIONAL = 2; 152: const XHTML1_STRICT = 3; 153: const XHTML5 = 6; 154:
155:
156: public $encoding = 'utf-8';
157:
158:
159: public $allowed = array();
160:
161:
162: public $allowedTags;
163:
164:
165: public $allowedClasses = Texy::ALL; 166:
167:
168: public $allowedStyles = Texy::ALL; 169:
170:
171: public $tabWidth = 8;
172:
173:
174: public $obfuscateEmail = TRUE;
175:
176:
177: public $urlSchemeFilters = NULL; 178:
179:
180: public $mergeLines = TRUE;
181:
182:
183: public $summary = array(
184: 'images' => array(),
185: 'links' => array(),
186: 'preload' => array(),
187: );
188:
189:
190: public $styleSheet = '';
191:
192:
193: public $alignClasses = array(
194: 'left' => NULL,
195: 'right' => NULL,
196: 'center' => NULL,
197: 'justify' => NULL,
198: 'top' => NULL,
199: 'middle' => NULL,
200: 'bottom' => NULL,
201: );
202:
203:
204: public $removeSoftHyphens = TRUE;
205:
206:
207: public static $advertisingNotice = 'once';
208:
209:
210: public $nontextParagraph = 'div';
211:
212:
213: public $scriptModule;
214:
215:
216: public $paragraphModule;
217:
218:
219: public $htmlModule;
220:
221:
222: public $imageModule;
223:
224:
225: public $linkModule;
226:
227:
228: public $phraseModule;
229:
230:
231: public $emoticonModule;
232:
233:
234: public $blockModule;
235:
236:
237: public $headingModule;
238:
239:
240: public $horizLineModule;
241:
242:
243: public $blockQuoteModule;
244:
245:
246: public $listModule;
247:
248:
249: public $tableModule;
250:
251:
252: public $figureModule;
253:
254:
255: public $typographyModule;
256:
257:
258: public $longWordsModule;
259:
260:
261: public $htmlOutputModule;
262:
263:
264: 265: 266: 267: 268:
269: private $linePatterns = array();
270: private $_linePatterns;
271:
272: 273: 274: 275: 276:
277: private $blockPatterns = array();
278: private $_blockPatterns;
279:
280:
281: private $postHandlers = array();
282:
283:
284: private $DOM;
285:
286:
287: private $marks = array();
288:
289:
290: public $_classes, $_styles;
291:
292:
293: private $processing;
294:
295:
296: private $handlers = array();
297:
298: 299: 300: 301: 302: 303: 304: 305: 306:
307: public $dtd;
308:
309:
310: private static $dtdCache;
311:
312:
313: private $mode;
314:
315:
316:
317: public static $strictDTD;
318: public $cleaner;
319: public $xhtml;
320:
321:
322:
323: public function __construct()
324: {
325: 326: $this->loadModules();
327:
328: 329: if (self::$strictDTD !== NULL) {
330: $this->setOutputMode(self::$strictDTD ? self::XHTML1_STRICT : self::XHTML1_TRANSITIONAL);
331: } else {
332: $this->setOutputMode(self::XHTML1_TRANSITIONAL);
333: }
334:
335: 336: $this->cleaner = & $this->htmlOutputModule;
337:
338: 339: $link = new TexyLink('http://texy.info/');
340: $link->modifier->title = 'The best text -> HTML converter and formatter';
341: $link->label = 'Texy!';
342: $this->linkModule->addReference('texy', $link);
343:
344: $link = new TexyLink('http://www.google.com/search?q=%s');
345: $this->linkModule->addReference('google', $link);
346:
347: $link = new TexyLink('http://en.wikipedia.org/wiki/Special:Search?search=%s');
348: $this->linkModule->addReference('wikipedia', $link);
349: }
350:
351:
352:
353: 354: 355: 356: 357:
358: public function setOutputMode($mode)
359: {
360: if (!in_array($mode, array(self::HTML4_TRANSITIONAL, self::HTML4_STRICT,
361: self::HTML5, self::XHTML1_TRANSITIONAL, self::XHTML1_STRICT, self::XHTML5), TRUE)) {
362: throw new InvalidArgumentException("Invalid mode.");
363: }
364:
365: if (!isset(self::$dtdCache[$mode])) {
366: require dirname(__FILE__) . '/libs/DTD.php';
367: self::$dtdCache[$mode] = $dtd;
368: }
369:
370: $this->mode = $mode;
371: $this->dtd = self::$dtdCache[$mode];
372: TexyHtml::$xhtml = (bool) ($mode & self::XML); 373:
374: 375: $this->allowedTags = array();
376: foreach ($this->dtd as $tag => $dtd) {
377: $this->allowedTags[$tag] = self::ALL;
378: }
379: }
380:
381:
382:
383: 384: 385: 386:
387: public function getOutputMode()
388: {
389: return $this->mode;
390: }
391:
392:
393:
394: 395: 396: 397:
398: protected function loadModules()
399: {
400: 401: $this->scriptModule = new TexyScriptModule($this);
402: $this->htmlModule = new TexyHtmlModule($this);
403: $this->imageModule = new TexyImageModule($this);
404: $this->phraseModule = new TexyPhraseModule($this);
405: $this->linkModule = new TexyLinkModule($this);
406: $this->emoticonModule = new TexyEmoticonModule($this);
407:
408: 409: $this->paragraphModule = new TexyParagraphModule($this);
410: $this->blockModule = new TexyBlockModule($this);
411: $this->figureModule = new TexyFigureModule($this);
412: $this->horizLineModule = new TexyHorizLineModule($this);
413: $this->blockQuoteModule = new TexyBlockQuoteModule($this);
414: $this->tableModule = new TexyTableModule($this);
415: $this->headingModule = new TexyHeadingModule($this);
416: $this->listModule = new TexyListModule($this);
417:
418: 419: $this->typographyModule = new TexyTypographyModule($this);
420: $this->longWordsModule = new TexyLongWordsModule($this);
421: $this->htmlOutputModule = new TexyHtmlOutputModule($this);
422: }
423:
424:
425:
426: final public function registerLinePattern($handler, $pattern, $name, $againTest = NULL)
427: {
428: if (!is_callable($handler)) {
429: $able = is_callable($handler, TRUE, $textual);
430: throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
431: }
432:
433: if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
434:
435: $this->linePatterns[$name] = array(
436: 'handler' => $handler,
437: 'pattern' => $pattern,
438: 'again' => $againTest,
439: );
440: }
441:
442:
443:
444: final public function registerBlockPattern($handler, $pattern, $name)
445: {
446: if (!is_callable($handler)) {
447: $able = is_callable($handler, TRUE, $textual);
448: throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
449: }
450:
451: 452: if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
453:
454: $this->blockPatterns[$name] = array(
455: 'handler' => $handler,
456: 'pattern' => $pattern . 'm', 457: );
458: }
459:
460:
461:
462: final public function registerPostLine($handler, $name)
463: {
464: if (!is_callable($handler)) {
465: $able = is_callable($handler, TRUE, $textual);
466: throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
467: }
468:
469: if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
470:
471: $this->postHandlers[$name] = $handler;
472: }
473:
474:
475:
476: 477: 478: 479: 480: 481: 482:
483: public function process($text, $singleLine = FALSE)
484: {
485: if ($this->processing) {
486: throw new InvalidStateException('Processing is in progress yet.');
487: }
488:
489: 490: $this->marks = array();
491: $this->processing = TRUE;
492:
493: 494: if (is_array($this->allowedClasses)) $this->_classes = array_flip($this->allowedClasses);
495: else $this->_classes = $this->allowedClasses;
496:
497: if (is_array($this->allowedStyles)) $this->_styles = array_flip($this->allowedStyles);
498: else $this->_styles = $this->allowedStyles;
499:
500: 501: $text = TexyUtf::toUtf($text, $this->encoding);
502:
503: if ($this->removeSoftHyphens) {
504: $text = str_replace("\xC2\xAD", '', $text);
505: }
506:
507: 508: $text = self::normalize($text);
509:
510: 511: $this->tabWidth = max(1, (int) $this->tabWidth);
512: while (strpos($text, "\t") !== FALSE) {
513: $text = preg_replace_callback('#^(.*)\t#mU', array($this, 'tabCb'), $text);
514: }
515:
516: 517: $this->invokeHandlers('beforeParse', array($this, & $text, $singleLine));
518:
519: 520: $this->_linePatterns = $this->linePatterns;
521: $this->_blockPatterns = $this->blockPatterns;
522: foreach ($this->_linePatterns as $name => $foo) {
523: if (empty($this->allowed[$name])) unset($this->_linePatterns[$name]);
524: }
525: foreach ($this->_blockPatterns as $name => $foo) {
526: if (empty($this->allowed[$name])) unset($this->_blockPatterns[$name]);
527: }
528:
529: 530: $this->DOM = TexyHtml::el();
531: if ($singleLine) {
532: $this->DOM->parseLine($this, $text);
533: } else {
534: $this->DOM->parseBlock($this, $text);
535: }
536:
537: 538: $this->invokeHandlers('afterParse', array($this, $this->DOM, $singleLine));
539:
540: 541: $html = $this->DOM->toHtml($this);
542:
543: 544: if (self::$advertisingNotice) {
545: $html .= "\n<!-- by Texy2! -->";
546: if (self::$advertisingNotice === 'once') {
547: self::$advertisingNotice = FALSE;
548: }
549: }
550:
551: $this->processing = FALSE;
552:
553: return TexyUtf::utf2html($html, $this->encoding);
554: }
555:
556:
557:
558: 559: 560: 561: 562: 563:
564: public function processLine($text)
565: {
566: return $this->process($text, TRUE);
567: }
568:
569:
570:
571: 572: 573: 574: 575:
576: public function processTypo($text)
577: {
578: 579: $text = TexyUtf::toUtf($text, $this->encoding);
580:
581: 582: $text = self::normalize($text);
583:
584: $this->typographyModule->beforeParse($this, $text);
585: $text = $this->typographyModule->postLine($text, TRUE);
586:
587: if (!empty($this->allowed['longwords'])) {
588: $text = $this->longWordsModule->postLine($text);
589: }
590:
591: return TexyUtf::utf2html($text, $this->encoding);
592: }
593:
594:
595:
596: 597: 598: 599:
600: public function toText()
601: {
602: if (!$this->DOM) {
603: throw new InvalidStateException('Call $texy->process() first.');
604: }
605:
606: return TexyUtf::utfTo($this->DOM->toText($this), $this->encoding);
607: }
608:
609:
610:
611: 612: 613: 614:
615: final public function stringToHtml($s)
616: {
617: 618: $s = self::unescapeHtml($s);
619:
620: 621: $blocks = explode(self::CONTENT_BLOCK, $s);
622: foreach ($this->postHandlers as $name => $handler) {
623: if (empty($this->allowed[$name])) continue;
624: foreach ($blocks as $n => $s) {
625: if ($n % 2 === 0 && $s !== '') {
626: $blocks[$n] = call_user_func($handler, $s);
627: }
628: }
629: }
630: $s = implode(self::CONTENT_BLOCK, $blocks);
631:
632: 633: $s = self::escapeHtml($s);
634:
635: 636: $s = $this->unProtect($s);
637:
638: 639: $this->invokeHandlers('postProcess', array($this, & $s));
640:
641: 642: $s = self::unfreezeSpaces($s);
643:
644: return $s;
645: }
646:
647:
648:
649: 650: 651: 652:
653: final public function stringToText($s)
654: {
655: $save = $this->htmlOutputModule->lineWrap;
656: $this->htmlOutputModule->lineWrap = FALSE;
657: $s = $this->stringToHtml( $s );
658: $this->htmlOutputModule->lineWrap = $save;
659:
660: 661: $s = preg_replace('#<(script|style)(.*)</\\1>#Uis', '', $s);
662: $s = strip_tags($s);
663: $s = preg_replace('#\n\s*\n\s*\n[\n\s]*\n#', "\n\n", $s);
664:
665: 666: $s = self::unescapeHtml($s);
667:
668: 669: $s = strtr($s, array(
670: "\xC2\xAD" => '', 671: "\xC2\xA0" => ' ', 672: ));
673:
674: return $s;
675: }
676:
677:
678:
679: 680: 681: 682: 683: 684: 685:
686: final public function addHandler($event, $callback)
687: {
688: if (!is_callable($callback)) {
689: $able = is_callable($callback, TRUE, $textual);
690: throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
691: }
692:
693: $this->handlers[$event][] = $callback;
694: }
695:
696:
697:
698: 699: 700: 701: 702: 703: 704: 705:
706: final public function invokeAroundHandlers($event, $parser, $args)
707: {
708: if (!isset($this->handlers[$event])) return FALSE;
709:
710: $invocation = new TexyHandlerInvocation($this->handlers[$event], $parser, $args);
711: $res = $invocation->proceed();
712: $invocation->free();
713: return $res;
714: }
715:
716:
717:
718: 719: 720: 721: 722: 723: 724:
725: final public function invokeHandlers($event, $args)
726: {
727: if (!isset($this->handlers[$event])) return;
728:
729: foreach ($this->handlers[$event] as $handler) {
730: call_user_func_array($handler, $args);
731: }
732: }
733:
734:
735:
736: 737: 738: 739: 740: 741:
742: final public static function freezeSpaces($s)
743: {
744: return strtr($s, " \t\r\n", "\x01\x02\x03\x04");
745: }
746:
747:
748:
749: 750: 751: 752: 753:
754: final public static function unfreezeSpaces($s)
755: {
756: return strtr($s, "\x01\x02\x03\x04", " \t\r\n");
757: }
758:
759:
760:
761: 762: 763: 764: 765:
766: final public static function normalize($s)
767: {
768: 769: $s = str_replace("\r\n", "\n", $s); 770: $s = strtr($s, "\r", "\n"); 771:
772: 773: $s = preg_replace('#[\x00-\x08\x0B-\x1F]+#', '', $s);
774:
775: 776: $s = preg_replace("#[\t ]+$#m", '', $s);
777:
778: 779: $s = trim($s, "\n");
780:
781: return $s;
782: }
783:
784:
785:
786: 787: 788: 789: 790: 791:
792: final public static function webalize($s, $charlist = NULL)
793: {
794: $s = TexyUtf::utf2ascii($s);
795: $s = strtolower($s);
796: $s = preg_replace('#[^a-z0-9'.preg_quote($charlist, '#').']+#', '-', $s);
797: $s = trim($s, '-');
798: return $s;
799: }
800:
801:
802:
803: 804: 805: 806: 807: 808:
809: final public static function escapeHtml($s)
810: {
811: return str_replace(array('&', '<', '>'), array('&', '<', '>'), $s);
812: }
813:
814:
815:
816: 817: 818: 819: 820:
821: final public static function unescapeHtml($s)
822: {
823: if (strpos($s, '&') === FALSE) return $s;
824: return html_entity_decode($s, ENT_QUOTES, 'UTF-8');
825: }
826:
827:
828:
829: 830: 831: 832: 833:
834: final public static function outdent($s)
835: {
836: $s = trim($s, "\n");
837: $spaces = strspn($s, ' ');
838: if ($spaces) return preg_replace("#^ {1,$spaces}#m", '', $s);
839: return $s;
840: }
841:
842:
843:
844: 845: 846: 847: 848: 849:
850: final public function protect($child, $contentType)
851: {
852: if ($child==='') return '';
853:
854: $key = $contentType
855: . strtr(base_convert(count($this->marks), 10, 8), '01234567', "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F")
856: . $contentType;
857:
858: $this->marks[$key] = $child;
859:
860: return $key;
861: }
862:
863:
864:
865: final public function unProtect($html)
866: {
867: return strtr($html, $this->marks);
868: }
869:
870:
871:
872: 873: 874: 875: 876: 877:
878: final public function checkURL($URL, $type)
879: {
880: 881: if (!empty($this->urlSchemeFilters[$type])
882: && preg_match('#'.TEXY_URLSCHEME.'#A', $URL)
883: && !preg_match($this->urlSchemeFilters[$type], $URL))
884: return FALSE;
885:
886: return TRUE;
887: }
888:
889:
890:
891: 892: 893: 894: 895:
896: final public static function isRelative($URL)
897: {
898: 899: return !preg_match('#'.TEXY_URLSCHEME.'|[\#/?]#A', $URL);
900: }
901:
902:
903:
904: 905: 906: 907: 908: 909:
910: final public static function prependRoot($URL, $root)
911: {
912: if ($root == NULL || !self::isRelative($URL)) return $URL;
913: return rtrim($root, '/\\') . '/' . $URL;
914: }
915:
916:
917:
918: final public function getLinePatterns()
919: {
920: return $this->_linePatterns;
921: }
922:
923:
924:
925: final public function getBlockPatterns()
926: {
927: return $this->_blockPatterns;
928: }
929:
930:
931:
932: final public function getDOM()
933: {
934: return $this->DOM;
935: }
936:
937:
938:
939: private function tabCb($m)
940: {
941: return $m[1] . str_repeat(' ', $this->tabWidth - strlen($m[1]) % $this->tabWidth);
942: }
943:
944:
945:
946: 947: 948:
949: final public function free()
950: {
951: if (version_compare(PHP_VERSION , '5.3', '<')) {
952: foreach (array_keys(get_object_vars($this)) as $key) {
953: $this->$key = NULL;
954: }
955: }
956: }
957:
958:
959:
960: final public function __clone()
961: {
962: throw new NotSupportedException('Clone is not supported.');
963: }
964:
965: }
966: