root/tags/release_day_23/lib/markdown.php

Revision 23, 54.8 kB (checked in by fabien, 3 years ago)

day 9 modifications

  • Property svn:mime-type set to text/x-php
  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Id
Line 
1 <?php
2
3 #
4 # PHP Markdown Extra  -  A text-to-HTML conversion tool for web writers
5 #
6 # Copyright (c) 2004-2005 Michel Fortin 
7 # <http://www.michelf.com/projects/php-markdown/>
8 #
9 # Based on Markdown 
10 # Copyright (c) 2004-2005 John Gruber 
11 # <http://daringfireball.net/projects/markdown/>
12 #
13
14
15 global    $MarkdownPHPVersion, $MarkdownSyntaxVersion,
16         $md_empty_element_suffix, $md_tab_width,
17         $md_nested_brackets_depth, $md_nested_brackets,
18         $md_escape_table, $md_backslash_escape_table,
19         $md_list_level;
20
21 $MarkdownPHPVersion    = 'Extra 1.0'; # Mon 5 Sep 2005
22 $MarkdownSyntaxVersion = '1.0.1'# Sun 12 Dec 2004
23
24
25 #
26 # Global default settings:
27 #
28 $md_empty_element_suffix = " />";     # Change to ">" for HTML output
29 $md_tab_width = 4;
30
31 #
32 # WordPress settings:
33 #
34 $md_wp_posts    = true# Set to false to remove Markdown from posts.
35 $md_wp_comments = true# Set to false to remove Markdown from comments.
36
37
38 # -- WordPress Plugin Interface -----------------------------------------------
39 /*
40 Plugin Name: PHP Markdown Extra
41 Plugin URI: http://www.michelf.com/projects/php-markdown/
42 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
43 Version: Extra 1.0
44 Author: Michel Fortin
45 Author URI: http://www.michelf.com/
46 */
47 if (isset($wp_version)) {
48     # More details about how it works here:
49     # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
50     
51     # Post content and excerpts
52     if ($md_wp_posts) {
53         remove_filter('the_content''wpautop');
54         remove_filter('the_excerpt''wpautop');
55         add_filter('the_content',     'Markdown', 6);
56         add_filter('get_the_excerpt', 'Markdown', 6);
57         add_filter('get_the_excerpt', 'trim', 7);
58         add_filter('the_excerpt',     'md_add_p');
59         add_filter('the_excerpt_rss', 'md_strip_p');
60         
61         remove_filter('content_save_pre''balanceTags', 50);
62         remove_filter('excerpt_save_pre''balanceTags', 50);
63         add_filter('the_content',        'balanceTags', 50);
64         add_filter('get_the_excerpt', 'balanceTags', 9);
65         
66         function md_add_p($text) {
67             if (strlen($text) == 0) return;
68             if (strcasecmp(substr($text, -3), '<p>') == 0) return $text;
69             return '<p>'.$text.'</p>';
70         }
71         function md_strip_p($t) { return preg_replace('{</?[pP]>}', '', $t); }
72     }
73     
74     # Comments
75     if ($md_wp_comments) {
76         remove_filter('comment_text', 'wpautop');
77         remove_filter('comment_text', 'make_clickable');
78         add_filter('pre_comment_content', 'Markdown', 6);
79         add_filter('pre_comment_content', 'md_hide_tags', 8);
80         add_filter('pre_comment_content', 'md_show_tags', 12);
81         add_filter('get_comment_text',    'Markdown', 6);
82         add_filter('get_comment_excerpt', 'Markdown', 6);
83         add_filter('get_comment_excerpt', 'md_strip_p', 7);
84     
85         global $md_hidden_tags;
86         $md_hidden_tags = array(
87             '<p>'    => md5('<p>'),        '</p>'    => md5('</p>'),
88             '<pre>'    => md5('<pre>'),    '</pre>'=> md5('</pre>'),
89             '<ol>'    => md5('<ol>'),        '</ol>'    => md5('</ol>'),
90             '<ul>'    => md5('<ul>'),        '</ul>'    => md5('</ul>'),
91             '<li>'    => md5('<li>'),        '</li>'    => md5('</li>'),
92             );
93         
94         function md_hide_tags($text) {
95             global $md_hidden_tags;
96             return str_replace(array_keys($md_hidden_tags),
97                                 array_values($md_hidden_tags), $text);
98         }
99         function md_show_tags($text) {
100             global $md_hidden_tags;
101             return str_replace(array_values($md_hidden_tags),
102                                 array_keys($md_hidden_tags), $text);
103         }
104     }
105 }
106
107
108 # -- bBlog Plugin Info --------------------------------------------------------
109 function identify_modifier_markdown() {
110     global $MarkdownPHPVersion;
111     return array(
112         'name'            => 'markdown',
113         'type'            => 'modifier',
114         'nicename'        => 'PHP Markdown Extra',
115         'description'    => 'A text-to-HTML conversion tool for web writers',
116         'authors'        => 'Michel Fortin and John Gruber',
117         'licence'        => 'GPL',
118         'version'        => $MarkdownPHPVersion,
119         'help'            => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
120     );
121 }
122
123 # -- Smarty Modifier Interface ------------------------------------------------
124 function smarty_modifier_markdown($text) {
125     return Markdown($text);
126 }
127
128 # -- Textile Compatibility Mode -----------------------------------------------
129 # Rename this file to "classTextile.php" and it can replace Textile anywhere.
130 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
131     # Try to include PHP SmartyPants. Should be in the same directory.
132     @include_once 'smartypants.php';
133     # Fake Textile class. It calls Markdown instead.
134     class Textile {
135         function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
136             if ($lite == '' && $encode == '')   $text = Markdown($text);
137             if (function_exists('SmartyPants')) $text = SmartyPants($text);
138             return $text;
139         }
140     }
141 }
142
143
144
145 #
146 # Globals:
147 #
148
149 # Regex to match balanced [brackets].
150 # Needed to insert a maximum bracked depth while converting to PHP.
151 $md_nested_brackets_depth = 6;
152 $md_nested_brackets =
153     str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
154     str_repeat('\])*', $md_nested_brackets_depth);
155
156 # Table of hash values for escaped characters:
157 $md_escape_table = array(
158     "\\" => md5("\\"),
159     "`" => md5("`"),
160     "*" => md5("*"),
161     "_" => md5("_"),
162     "{" => md5("{"),
163     "}" => md5("}"),
164     "[" => md5("["),
165     "]" => md5("]"),
166     "(" => md5("("),
167     ")" => md5(")"),
168     ">" => md5(">"),
169     "#" => md5("#"),
170     "+" => md5("+"),
171     "-" => md5("-"),
172     "." => md5("."),
173     "!" => md5("!"),
174     ":" => md5(":"),
175     "|" => md5("|"),
176 );
177 # Create an identical table but for escaped characters.
178 $md_backslash_escape_table;
179 foreach ($md_escape_table as $key => $char)
180     $md_backslash_escape_table["\\$key"] = $char;
181
182
183
184 function Markdown($text) {
185 #
186 # Main function. The order in which other subs are called here is
187 # essential. Link and image substitutions need to happen before
188 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
189 # and <img> tags get encoded.
190 #
191     # Clear the global hashes. If we don't clear these, you get conflicts
192     # from other articles when generating a page which contains more than
193     # one article (e.g. an index page that shows the N most recent
194     # articles):
195     global $md_urls, $md_titles, $md_html_blocks, $md_html_hashes;
196     $md_urls = array();
197     $md_titles = array();
198     $md_html_blocks = array();
199     $md_html_hashes = array();
200
201     # Standardize line endings:
202     #   DOS to Unix and Mac to Unix
203     $text = str_replace(array("\r\n", "\r"), "\n", $text);
204
205     # Make sure $text ends with a couple of newlines:
206     $text .= "\n\n";
207
208     # Convert all tabs to spaces.
209     $text = _Detab($text);
210
211     # Turn block-level HTML blocks into hash entries
212     $text = _HashHTMLBlocks($text);
213
214     # Strip any lines consisting only of spaces and tabs.
215     # This makes subsequent regexen easier to write, because we can
216     # match consecutive blank lines with /\n+/ instead of something
217     # contorted like /[ \t]*\n+/ .
218     $text = preg_replace('/^[ \t]+$/m', '', $text);
219
220     # Strip link definitions, store in hashes.
221     $text = _StripLinkDefinitions($text);
222
223     $text = _RunBlockGamut($text, FALSE);
224
225     $text = _UnescapeSpecialChars($text);
226
227     return $text . "\n";
228 }
229
230
231 function _StripLinkDefinitions($text) {
232 #
233 # Strips link definitions from text, stores the URLs and titles in
234 # hash references.
235 #
236     global $md_tab_width;
237     $less_than_tab = $md_tab_width - 1;
238
239     # Link defs are in the form: ^[id]: url "optional title"
240     $text = preg_replace_callback('{
241                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\]:    # id = $1
242                           [ \t]*
243                           \n?                # maybe *one* newline
244                           [ \t]*
245                         <?(\S+?)>?            # url = $2
246                           [ \t]*
247                           \n?                # maybe one newline
248                           [ \t]*
249                         (?:
250                             (?<=\s)            # lookbehind for whitespace
251                             ["(]
252                             (.+?)            # title = $3
253                             [")]
254                             [ \t]*
255                         )?    # title is optional
256                         (?:\n+|\Z)
257         }xm',
258         '_StripLinkDefinitions_callback',
259         $text);
260     return $text;
261 }
262 function _StripLinkDefinitions_callback($matches) {
263     global $md_urls, $md_titles;
264     $link_id = strtolower($matches[1]);
265     $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
266     if (isset($matches[3]))
267         $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
268     return ''; # String that will replace the block
269 }
270
271
272 function _HashHTMLBlocks($text) {
273 #
274 # Hashify HTML Blocks and "clean tags".
275 #
276 # We only want to do this for block-level HTML tags, such as headers,
277 # lists, and tables. That's because we still want to wrap <p>s around
278 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
279 # phrase emphasis, and spans. The list of tags we're looking for is
280 # hard-coded.
281 #
282 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
283 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
284 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
285 #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
286 # These two functions are calling each other. It's recursive!
287 #
288     global    $block_tags, $context_block_tags, $contain_span_tags,
289             $clean_tags, $auto_close_tags;
290     
291     # Tags that are always treated as block tags:
292     $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
293                     'form|fieldset|iframe|hr|legend';
294     
295     # Tags treated as block tags only if the opening tag is alone on it's line:
296     $context_block_tags = 'script|noscript|math|ins|del';
297     
298     # Tags where markdown="1" default to span mode:
299     $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend';
300     
301     # Tags which must not have their contents modified, no matter where
302     # they appear:
303     $clean_tags = 'script|math';
304     
305     # Tags that do not need to be closed.
306     $auto_close_tags = 'hr|img';
307     
308     # Regex to match any tag.
309     global $tag_match;
310     $tag_match =
311         '{
312             (                    # $2: Capture hole tag.
313                 </?                    # Any opening or closing tag.
314                     [\w:$]+            # Tag name.
315                     \s*                # Whitespace.
316                     (?:
317                         ".*?"        |    # Double quotes (can contain `>`)
318                         \'.*?\'       |    # Single quotes (can contain `>`)
319                         .+?                # Anything but quotes and `>`.
320                     )*?
321                 >                    # End of tag.
322             |
323                 <!--    .*?     -->    # HTML Comment
324             |
325                 <\?     .*?     \?>    # Processing instruction
326             |
327                 <!\[CDATA\[.*?\]\]>    # CData Block
328             )
329         }xs';
330     
331     #
332     # Call the HTML-in-Markdown hasher.
333     #
334     list($text, ) = _HashHTMLBlocks_InMarkdown($text);
335     
336     return $text;
337 }
338 function _HashHTMLBlocks_InMarkdown($text, $indent = 0,
339                                     $enclosing_tag = '', $md_span = false)
340 {
341 #
342 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
343 #
344 # *   $indent is the number of space to be ignored when checking for code
345 #     blocks. This is important because if we don't take the indent into
346 #     account, something like this (which looks right) won't work as expected:
347 #
348 #     <div>
349 #         <div markdown="1">
350 #         Hello World.  <-- Is this a Markdown code block or text?
351 #         </div>  <-- Is this a Markdown code block or a real tag?
352 #     <div>
353 #
354 #     If you don't like this, just don't indent the tag on which
355 #     you apply the markdown="1" attribute.
356 #
357 # *   If $enclosing_tag is not empty, stops at the first unmatched closing
358 #     tag with that name. Nested tags supported.
359 #
360 # *   If $md_span is true, text inside must treated as span. So any double
361 #     newline will be replaced by a single newline so that it does not create
362 #     paragraphs.
363 #
364 # Returns an array of that form: ( processed text , remaining text )
365 #
366     global    $block_tags, $context_block_tags, $clean_tags, $auto_close_tags,
367             $tag_match;
368     
369     if ($text === '') return array('', '');
370
371     # Regex to check for the presense of newlines around a block tag.
372     $newline_match_before = "/(?:^\n?|\n\n) *$/";
373     $newline_match_after =
374         '{
375             ^                        # Start of text following the tag.
376             (?:[ ]*<!--.*?-->)?        # Optional comment.
377             [ ]*\n                    # Must be followed by newline.
378         }xs';
379     
380     # Regex to match any tag.
381     $block_tag_match =
382         '{
383             (                    # $2: Capture hole tag.
384                 </?                    # Any opening or closing tag.
385                     (?:                # Tag name.
386                         '.$block_tags.'            |
387                         '.$context_block_tags.'    |
388                         '.$clean_tags.'            |
389                         (?!\s)'.$enclosing_tag.'
390                     )
391                     \s*                # Whitespace.
392                     (?:
393                         ".*?"        |    # Double quotes (can contain `>`)
394                         \'.*?\'       |    # Single quotes (can contain `>`)
395                         .+?                # Anything but quotes and `>`.
396                     )*?
397                 >                    # End of tag.
398             |
399                 <!--    .*?     -->    # HTML Comment
400             |
401                 <\?     .*?     \?>    # Processing instruction
402             |
403                 <!\[CDATA\[.*?\]\]>    # CData Block
404             )
405         }xs';
406
407     
408     $depth = 0;        # Current depth inside the tag tree.
409     $parsed = "";    # Parsed text that will be returned.
410
411     #
412     # Loop through every tag until we find the closing tag of the parent
413     # or loop until reaching the end of text if no parent tag specified.
414     #
415     do {
416         #
417         # Split the text using the first $tag_match pattern found.
418         # Text before  pattern will be first in the array, text after
419         # pattern will be at the end, and between will be any catches made
420         # by the pattern.
421         #
422         $parts = preg_split($block_tag_match, $text, 2,
423                             PREG_SPLIT_DELIM_CAPTURE);
424         
425         # If in Markdown span mode, replace any multiple newlines that would
426         # trigger a new paragraph.
427         if ($md_span) {
428             $parts[0] = preg_replace('/\n\n/', "\n", $parts[0]);
429         }
430         
431         $parsed .= $parts[0]; # Text before current tag.
432         
433         # If end of $text has been reached. Stop loop.
434         if (count($parts) < 3) {
435             $text = "";
436             break;
437         }
438         
439         $tag  = $parts[1]; # Tag to handle.
440         $text = $parts[2]; # Remaining text after current tag.
441         
442         #
443         # Check for: Tag inside code block or span
444         #
445         if (# Find current paragraph
446             preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
447             (
448             # Then match in it either a code block...
449             preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
450                         '(?!\n)$/', $matches[1], $x) ||
451             # ...or unbalenced code span markers. (the regex matches balenced)
452             !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
453                          $matches[1])
454             ))
455         {
456             # Tag is in code block or span and may not be a tag at all. So we
457             # simply skip the first char (should be a `<`).
458             $parsed .= $tag{0};
459             $text = substr($tag, 1) . $text; # Put back $tag minus first char.
460         }
461         #
462         # Check for: Opening Block level tag or
463         #            Opening Content Block tag (like ins and del)
464         #               used as a block tag (tag is alone on it's line).
465         #
466         else if (preg_match("{^<(?:$block_tags)\b}", $tag) ||
467             (    preg_match("{^<(?:$context_block_tags)\b}", $tag) &&
468                 preg_match($newline_match_before, $parsed) &&
469                 preg_match($newline_match_after, $text)    )
470             )
471         {
472             # Need to parse tag and following text using the HTML parser.
473             list($block_text, $text) =
474                 _HashHTMLBlocks_InHTML($tag . $text,
475                                     "_HashHTMLBlocks_HashBlock", TRUE);
476             
477             # Make sure it stays outside of any paragraph by adding newlines.
478             $parsed .= "\n\n$block_text\n\n";
479         }
480         #
481         # Check for: Clean tag (like script, math)
482         #            HTML Comments, processing instructions.
483         #
484         else if (preg_match("{^<(?:$clean_tags)\b}", $tag) ||
485             $tag{1} == '!' || $tag{1} == '?')
486         {
487             # Need to parse tag and following text using the HTML parser.
488             # (don't check for markdown attribute)
489             list($block_text, $text) =
490                 _HashHTMLBlocks_InHTML($tag . $text,
491                                     "_HashHTMLBlocks_HashClean", FALSE);
492             
493             $parsed .= $block_text;
494         }
495         #
496         # Check for: Tag with same name as enclosing tag.
497         #
498         else if ($enclosing_tag !== '' &&
499             # Same name as enclosing tag.
500             preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
501         {
502             #
503             # Increase/decrease nested tag count.
504             #
505             if ($tag{1} == '/')                        $depth--;
506             else if ($tag{strlen($tag)-2} != '/')    $depth++;
507
508             if ($depth < 0) {
509                 #
510                 # Going out of parent element. Clean up and break so we
511                 # return to the calling function.
512                 #
513                 $text = $tag . $text;
514                 break;
515             }
516             
517             $parsed .= $tag;
518         }
519         else {
520             $parsed .= $tag;
521         }
522     } while ($depth >= 0);
523     
524     return array($parsed, $text);
525 }
526 function _HashHTMLBlocks_InHTML($text, $hash_function, $md_attr) {
527 #
528 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
529 #
530 # *   Calls $hash_function to convert any blocks.
531 # *   Stops when the first opening tag closes.
532 # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
533 #     (it is not inside clean tags)
534 #
535 # Returns an array of that form: ( processed text , remaining text )
536 #
537     global $auto_close_tags, $contain_span_tags, $tag_match;
538     
539     if ($text === '') return array('', '');
540     
541     # Regex to match `markdown` attribute inside of a tag.
542     $markdown_attr_match = '
543         {
544             \s*            # Eat whitespace before the `markdown` attribute
545             markdown
546             \s*=\s*
547             (["\'])        # $1: quote delimiter       
548             (.*?)        # $2: attribute value
549             \1            # matching delimiter   
550         }xs';
551     
552     $original_text = $text;        # Save original text in case of faliure.
553     
554     $depth        = 0;    # Current depth inside the tag tree.
555     $block_text    = "";    # Temporary text holder for current text.