| 273 | | # |
|---|
| 274 | | # Hashify HTML Blocks and "clean tags". |
|---|
| 275 | | # |
|---|
| 276 | | # We only want to do this for block-level HTML tags, such as headers, |
|---|
| 277 | | # lists, and tables. That's because we still want to wrap <p>s around |
|---|
| 278 | | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, |
|---|
| 279 | | # phrase emphasis, and spans. The list of tags we're looking for is |
|---|
| 280 | | # hard-coded. |
|---|
| 281 | | # |
|---|
| 282 | | # This works by calling _HashHTMLBlocks_InMarkdown, which then calls |
|---|
| 283 | | # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" |
|---|
| 284 | | # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back |
|---|
| 285 | | # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. |
|---|
| 286 | | # These two functions are calling each other. It's recursive! |
|---|
| 287 | | # |
|---|
| 288 | | global $block_tags, $context_block_tags, $contain_span_tags, |
|---|
| 289 | | $clean_tags, $auto_close_tags; |
|---|
| 290 | | |
|---|
| 291 | | # Tags that are always treated as block tags: |
|---|
| 292 | | $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. |
|---|
| 293 | | 'form|fieldset|iframe|hr|legend'; |
|---|
| 294 | | |
|---|
| 295 | | # Tags treated as block tags only if the opening tag is alone on it's line: |
|---|
| 296 | | $context_block_tags = 'script|noscript|math|ins|del'; |
|---|
| 297 | | |
|---|
| 298 | | # Tags where markdown="1" default to span mode: |
|---|
| 299 | | $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend'; |
|---|
| 300 | | |
|---|
| 301 | | # Tags which must not have their contents modified, no matter where |
|---|
| 302 | | # they appear: |
|---|
| 303 | | $clean_tags = 'script|math'; |
|---|
| 304 | | |
|---|
| 305 | | # Tags that do not need to be closed. |
|---|
| 306 | | $auto_close_tags = 'hr|img'; |
|---|
| 307 | | |
|---|
| 308 | | # Regex to match any tag. |
|---|
| 309 | | global $tag_match; |
|---|
| 310 | | $tag_match = |
|---|
| 311 | | '{ |
|---|
| 312 | | ( # $2: Capture hole tag. |
|---|
| 313 | | </? # Any opening or closing tag. |
|---|
| 314 | | [\w:$]+ # Tag name. |
|---|
| 315 | | \s* # Whitespace. |
|---|
| 316 | | (?: |
|---|
| 317 | | ".*?" | # Double quotes (can contain `>`) |
|---|
| 318 | | \'.*?\' | # Single quotes (can contain `>`) |
|---|
| 319 | | .+? # Anything but quotes and `>`. |
|---|
| 320 | | )*? |
|---|
| 321 | | > # End of tag. |
|---|
| 322 | | | |
|---|
| 323 | | <!-- .*? --> # HTML Comment |
|---|
| 324 | | | |
|---|
| 325 | | <\? .*? \?> # Processing instruction |
|---|
| 326 | | | |
|---|
| 327 | | <!\[CDATA\[.*?\]\]> # CData Block |
|---|
| 328 | | ) |
|---|
| 329 | | }xs'; |
|---|
| 330 | | |
|---|
| 331 | | # |
|---|
| 332 | | # Call the HTML-in-Markdown hasher. |
|---|
| 333 | | # |
|---|
| 334 | | list($text, ) = _HashHTMLBlocks_InMarkdown($text); |
|---|
| 335 | | |
|---|
| 336 | | return $text; |
|---|
| 337 | | } |
|---|
| 338 | | function _HashHTMLBlocks_InMarkdown($text, $indent = 0, |
|---|
| 339 | | $enclosing_tag = '', $md_span = false) |
|---|
| 340 | | { |
|---|
| 341 | | # |
|---|
| 342 | | # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. |
|---|
| 343 | | # |
|---|
| 344 | | # * $indent is the number of space to be ignored when checking for code |
|---|
| 345 | | # blocks. This is important because if we don't take the indent into |
|---|
| 346 | | # account, something like this (which looks right) won't work as expected: |
|---|
| 347 | | # |
|---|
| 348 | | # <div> |
|---|
| 349 | | # <div markdown="1"> |
|---|
| 350 | | # Hello World. <-- Is this a Markdown code block or text? |
|---|
| 351 | | # </div> <-- Is this a Markdown code block or a real tag? |
|---|
| 352 | | # <div> |
|---|
| 353 | | # |
|---|
| 354 | | # If you don't like this, just don't indent the tag on which |
|---|
| 355 | | # you apply the markdown="1" attribute. |
|---|
| 356 | | # |
|---|
| 357 | | # * If $enclosing_tag is not empty, stops at the first unmatched closing |
|---|
| 358 | | # tag with that name. Nested tags supported. |
|---|
| 359 | | # |
|---|
| 360 | | # * If $md_span is true, text inside must treated as span. So any double |
|---|
| 361 | | # newline will be replaced by a single newline so that it does not create |
|---|
| 362 | | # paragraphs. |
|---|
| 363 | | # |
|---|
| 364 | | # Returns an array of that form: ( processed text , remaining text ) |
|---|
| 365 | | # |
|---|
| 366 | | global $block_tags, $context_block_tags, $clean_tags, $auto_close_tags, |
|---|
| 367 | | $tag_match; |
|---|
| 368 | | |
|---|
| 369 | | if ($text === '') return array('', ''); |
|---|
| 370 | | |
|---|
| 371 | | # Regex to check for the presense of newlines around a block tag. |
|---|
| 372 | | $newline_match_before = "/(?:^\n?|\n\n) *$/"; |
|---|
| 373 | | $newline_match_after = |
|---|
| 374 | | '{ |
|---|
| 375 | | ^ # Start of text following the tag. |
|---|
| 376 | | (?:[ ]*<!--.*?-->)? # Optional comment. |
|---|
| 377 | | [ ]*\n # Must be followed by newline. |
|---|
| 378 | | }xs'; |
|---|
| 379 | | |
|---|
| 380 | | # Regex to match any tag. |
|---|
| 381 | | $block_tag_match = |
|---|
| 382 | | '{ |
|---|
| 383 | | ( # $2: Capture hole tag. |
|---|
| 384 | | </? # Any opening or closing tag. |
|---|
| 385 | | (?: # Tag name. |
|---|
| 386 | | '.$block_tags.' | |
|---|
| 387 | | '.$context_block_tags.' | |
|---|
| 388 | | '.$clean_tags.' | |
|---|
| 389 | | (?!\s)'.$enclosing_tag.' |
|---|
| | 268 | global $md_tab_width; |
|---|
| | 269 | $less_than_tab = $md_tab_width - 1; |
|---|
| | 270 | |
|---|
| | 271 | # Hashify HTML blocks: |
|---|
| | 272 | # We only want to do this for block-level HTML tags, such as headers, |
|---|
| | 273 | # lists, and tables. That's because we still want to wrap <p>s around |
|---|
| | 274 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, |
|---|
| | 275 | # phrase emphasis, and spans. The list of tags we're looking for is |
|---|
| | 276 | # hard-coded: |
|---|
| | 277 | $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. |
|---|
| | 278 | 'script|noscript|form|fieldset|iframe|math|ins|del'; |
|---|
| | 279 | $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. |
|---|
| | 280 | 'script|noscript|form|fieldset|iframe|math'; |
|---|
| | 281 | |
|---|
| | 282 | # First, look for nested blocks, e.g.: |
|---|
| | 283 | # <div> |
|---|
| | 284 | # <div> |
|---|
| | 285 | # tags for inner block must be indented. |
|---|
| | 286 | # </div> |
|---|
| | 287 | # </div> |
|---|
| | 288 | # |
|---|
| | 289 | # The outermost tags must start at the left margin for this to match, and |
|---|
| | 290 | # the inner nested divs must be indented. |
|---|
| | 291 | # We need to do this before the next, more liberal match, because the next |
|---|
| | 292 | # match will start at the first `<div>` and stop at the first `</div>`. |
|---|
| | 293 | $text = preg_replace_callback("{ |
|---|
| | 294 | ( # save in $1 |
|---|
| | 295 | ^ # start of line (with /m) |
|---|
| | 296 | <($block_tags_a) # start tag = $2 |
|---|
| | 297 | \\b # word break |
|---|
| | 298 | (.*\\n)*? # any number of lines, minimally matching |
|---|
| | 299 | </\\2> # the matching end tag |
|---|
| | 300 | [ \\t]* # trailing spaces/tabs |
|---|
| | 301 | (?=\\n+|\\Z) # followed by a newline or end of document |
|---|
| | 302 | ) |
|---|
| | 303 | }xm", |
|---|
| | 304 | '_HashHTMLBlocks_callback', |
|---|
| | 305 | $text); |
|---|
| | 306 | |
|---|
| | 307 | # |
|---|
| | 308 | # Now match more liberally, simply from `\n<tag>` to `</tag>\n` |
|---|
| | 309 | # |
|---|
| | 310 | $text = preg_replace_callback("{ |
|---|
| | 311 | ( # save in $1 |
|---|
| | 312 | ^ # start of line (with /m) |
|---|
| | 313 | <($block_tags_b) # start tag = $2 |
|---|
| | 314 | \\b # word break |
|---|
| | 315 | (.*\\n)*? # any number of lines, minimally matching |
|---|
| | 316 | .*</\\2> # the matching end tag |
|---|
| | 317 | [ \\t]* # trailing spaces/tabs |
|---|
| | 318 | (?=\\n+|\\Z) # followed by a newline or end of document |
|---|
| | 319 | ) |
|---|
| | 320 | }xm", |
|---|
| | 321 | '_HashHTMLBlocks_callback', |
|---|
| | 322 | $text); |
|---|
| | 323 | |
|---|
| | 324 | # Special case just for <hr />. It was easier to make a special case than |
|---|
| | 325 | # to make the other regex more complicated. |
|---|
| | 326 | $text = preg_replace_callback('{ |
|---|
| | 327 | (?: |
|---|
| | 328 | (?<=\n\n) # Starting after a blank line |
|---|
| | 329 | | # or |
|---|
| | 330 | \A\n? # the beginning of the doc |
|---|
| | 331 | ) |
|---|
| | 332 | ( # save in $1 |
|---|
| | 333 | [ ]{0,'.$less_than_tab.'} |
|---|
| | 334 | <(hr) # start tag = $2 |
|---|
| | 335 | \b # word break |
|---|
| | 336 | ([^<>])*? # |
|---|
| | 337 | /?> # the matching end tag |
|---|
| | 338 | [ \t]* |
|---|
| | 339 | (?=\n{2,}|\Z) # followed by a blank line or end of document |
|---|
| | 340 | ) |
|---|
| | 341 | }x', |
|---|
| | 342 | '_HashHTMLBlocks_callback', |
|---|
| | 343 | $text); |
|---|
| | 344 | |
|---|
| | 345 | # Special case for standalone HTML comments: |
|---|
| | 346 | $text = preg_replace_callback('{ |
|---|
| | 347 | (?: |
|---|
| | 348 | (?<=\n\n) # Starting after a blank line |
|---|
| | 349 | | # or |
|---|
| | 350 | \A\n? # the beginning of the doc |
|---|
| | 351 | ) |
|---|
| | 352 | ( # save in $1 |
|---|
| | 353 | [ ]{0,'.$less_than_tab.'} |
|---|
| | 354 | (?s: |
|---|
| | 355 | <! |
|---|
| | 356 | (--.*?--\s*)+ |
|---|
| | 357 | > |
|---|
| 391 | | \s* # Whitespace. |
|---|
| 392 | | (?: |
|---|
| 393 | | ".*?" | # Double quotes (can contain `>`) |
|---|
| 394 | | \'.*?\' | # Single quotes (can contain `>`) |
|---|
| 395 | | .+? # Anything but quotes and `>`. |
|---|
| 396 | | )*? |
|---|
| 397 | | > # End of tag. |
|---|
| 398 | | | |
|---|
| 399 | | <!-- .*? --> # HTML Comment |
|---|
| 400 | | | |
|---|
| 401 | | <\? .*? \?> # Processing instruction |
|---|
| 402 | | | |
|---|
| 403 | | <!\[CDATA\[.*?\]\]> # CData Block |
|---|
| 404 | | ) |
|---|
| 405 | | }xs'; |
|---|
| 406 | | |
|---|
| 407 | | |
|---|
| 408 | | $depth = 0; # Current depth inside the tag tree. |
|---|
| 409 | | $parsed = ""; # Parsed text that will be returned. |
|---|
| 410 | | |
|---|
| 411 | | # |
|---|
| 412 | | # Loop through every tag until we find the closing tag of the parent |
|---|
| 413 | | # or loop until reaching the end of text if no parent tag specified. |
|---|
| 414 | | # |
|---|
| 415 | | do { |
|---|
| 416 | | # |
|---|
| 417 | | # Split the text using the first $tag_match pattern found. |
|---|
| 418 | | # Text before pattern will be first in the array, text after |
|---|
| 419 | | # pattern will be at the end, and between will be any catches made |
|---|
| 420 | | # by the pattern. |
|---|
| 421 | | # |
|---|
| 422 | | $parts = preg_split($block_tag_match, $text, 2, |
|---|
| 423 | | PREG_SPLIT_DELIM_CAPTURE); |
|---|
| 424 | | |
|---|
| 425 | | # If in Markdown span mode, replace any multiple newlines that would |
|---|
| 426 | | # trigger a new paragraph. |
|---|
| 427 | | if ($md_span) { |
|---|
| 428 | | $parts[0] = preg_replace('/\n\n/', "\n", $parts[0]); |
|---|
| 429 | | } |
|---|
| 430 | | |
|---|
| 431 | | $parsed .= $parts[0]; # Text before current tag. |
|---|
| 432 | | |
|---|
| 433 | | # If end of $text has been reached. Stop loop. |
|---|
| 434 | | if (count($parts) < 3) { |
|---|
| 435 | | $text = ""; |
|---|
| 436 | | break; |
|---|
| 437 | | } |
|---|
| 438 | | |
|---|
| 439 | | $tag = $parts[1]; # Tag to handle. |
|---|
| 440 | | $text = $parts[2]; # Remaining text after current tag. |
|---|
| 441 | | |
|---|
| 442 | | # |
|---|
| 443 | | # Check for: Tag inside code block or span |
|---|
| 444 | | # |
|---|
| 445 | | if (# Find current paragraph |
|---|
| 446 | | preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) && |
|---|
| 447 | | ( |
|---|
| 448 | | # Then match in it either a code block... |
|---|
| 449 | | preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'. |
|---|
| 450 | | '(?!\n)$/', $matches[1], $x) || |
|---|
| 451 | | # ...or unbalenced code span markers. (the regex matches balenced) |
|---|
| 452 | | !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s', |
|---|
| 453 | | $matches[1]) |
|---|
| 454 | | )) |
|---|
| 455 | | { |
|---|
| 456 | | # Tag is in code block or span and may not be a tag at all. So we |
|---|
| 457 | | # simply skip the first char (should be a `<`). |
|---|
| 458 | | $parsed .= $tag{0}; |
|---|
| 459 | | $text = substr($tag, 1) . $text; # Put back $tag minus first char. |
|---|
| 460 | | } |
|---|
| 461 | | # |
|---|
| 462 | | # Check for: Opening Block level tag or |
|---|
| 463 | | # Opening Content Block tag (like ins and del) |
|---|
| 464 | | # used as a block tag (tag is alone on it's line). |
|---|
| 465 | | # |
|---|
| 466 | | else if (preg_match("{^<(?:$block_tags)\b}", $tag) || |
|---|
| 467 | | ( preg_match("{^<(?:$context_block_tags)\b}", $tag) && |
|---|
| 468 | | preg_match($newline_match_before, $parsed) && |
|---|
| 469 | | preg_match($newline_match_after, $text) ) |
|---|
| 470 | | ) |
|---|
| 471 | | { |
|---|
| 472 | | # Need to parse tag and following text using the HTML parser. |
|---|
| 473 | | list($block_text, $text) = |
|---|
| 474 | | _HashHTMLBlocks_InHTML($tag . $text, |
|---|
| 475 | | "_HashHTMLBlocks_HashBlock", TRUE); |
|---|
| 476 | | |
|---|
| 477 | | # Make sure it stays outside of any paragraph by adding newlines. |
|---|
| 478 | | $parsed .= "\n\n$block_text\n\n"; |
|---|
| 479 | | } |
|---|
| 480 | | # |
|---|
| 481 | | # Check for: Clean tag (like script, math) |
|---|
| 482 | | # HTML Comments, processing instructions. |
|---|
| 483 | | # |
|---|
| 484 | | else if (preg_match("{^<(?:$clean_tags)\b}", $tag) || |
|---|
| 485 | | $tag{1} == '!' || $tag{1} == '?') |
|---|
| 486 | | { |
|---|
| 487 | | # Need to parse tag and following text using the HTML parser. |
|---|
| 488 | | # (don't check for markdown attribute) |
|---|
| 489 | | list($block_text, $text) = |
|---|
| 490 | | _HashHTMLBlocks_InHTML($tag . $text, |
|---|
| 491 | | "_HashHTMLBlocks_HashClean", FALSE); |
|---|
| 492 | | |
|---|
| 493 | | $parsed .= $block_text; |
|---|
| 494 | | } |
|---|
| 495 | | # |
|---|
| 496 | | # Check for: Tag with same name as enclosing tag. |
|---|
| 497 | | # |
|---|
| 498 | | else if ($enclosing_tag !== '' && |
|---|
| 499 | | # Same name as enclosing tag. |
|---|
| 500 | | preg_match("{^</?(?:$enclosing_tag)\b}", $tag)) |
|---|
| 501 | | { |
|---|
| 502 | | # |
|---|
| 503 | | # Increase/decrease nested tag count. |
|---|
| 504 | | # |
|---|
| 505 | | if ($tag{1} == '/') $depth--; |
|---|
| 506 | | else if ($tag{strlen($tag)-2} != '/') $depth++; |
|---|
| 507 | | |
|---|
| 508 | | if ($depth < 0) { |
|---|
| 509 | | # |
|---|
| 510 | | # Going out of parent element. Clean up and break so we |
|---|
| 511 | | # return to the calling function. |
|---|
| 512 | | # |
|---|
| 513 | | $text = $tag . $text; |
|---|
| 514 | | break; |
|---|
| 515 | | } |
|---|
| 516 | | |
|---|
| 517 | | $parsed .= $tag; |
|---|
| 518 | | } |
|---|
| 519 | | else { |
|---|
| 520 | | $parsed .= $tag; |
|---|
| 521 | | } |
|---|
| 522 | | } while ($depth >= 0); |
|---|
| 523 | | |
|---|
| 524 | | return array($parsed, $text); |
|---|
| 525 | | } |
|---|
| 526 | | function _HashHTMLBlocks_InHTML($text, $hash_function, $md_attr) { |
|---|
| 527 | | # |
|---|
| 528 | | # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. |
|---|
| 529 | | # |
|---|
| 530 | | # * Calls $hash_function to convert any blocks. |
|---|
| 531 | | # * Stops when the first opening tag closes. |
|---|
| 532 | | # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. |
|---|
| 533 | | # (it is not inside clean tags) |
|---|
| 534 | | # |
|---|
| 535 | | # Returns an array of that form: ( processed text , remaining text ) |
|---|
| 536 | | # |
|---|
| 537 | | global $auto_close_tags, $contain_span_tags, $tag_match; |
|---|
| 538 | | |
|---|
| 539 | | if ($text === '') return array('', ''); |
|---|
| 540 | | |
|---|
| 541 | | # Regex to match `markdown` attribute inside of a tag. |
|---|
| 542 | | $markdown_attr_match = ' |
|---|
| 543 | | { |
|---|
| 544 | | \s* # Eat whitespace before the `markdown` attribute |
|---|
| 545 | | markdown |
|---|
| 546 | | \s*=\s* |
|---|
| 547 | | (["\']) # $1: quote delimiter |
|---|
| 548 | | (.*?) # $2: attribute value |
|---|
| 549 | | \1 # matching delimiter |
|---|
| 550 | | }xs'; |
|---|
| 551 | | |
|---|
| 552 | | $original_text = $text; # Save original text in case of faliure. |
|---|
| 553 | | |
|---|
| 554 | | $depth = 0; # Current depth inside the tag tree. |
|---|
| 555 | | $block_text = ""; # Temporary text holder for current text. |
|---|
| 556 | | $parsed = ""; # Parsed text that will be returned. |
|---|
| 557 | | |
|---|
| 558 | | # |
|---|
| 559 | | # Get the name of the starting tag. |
|---|
| 560 | | # |
|---|
| 561 | | if (preg_match("/^<([\w:$]*)\b/", $text, $matches)) |
|---|
| 562 | | $base_tag_name = $matches[1]; |
|---|
| 563 | | |
|---|
| 564 | | # |
|---|
| 565 | | # Loop through every tag until we find the corresponding closing tag. |
|---|
| 566 | | # |
|---|
| 567 | | do { |
|---|
| 568 | | # |
|---|
| 569 | | # Split the text using the first $tag_match pattern found. |
|---|
| 570 | | # Text before pattern will be first in the array, text after |
|---|
| 571 | | # pattern will be at the end, and between will be any catches made |
|---|
| 572 | | # by the pattern. |
|---|
| 573 | | # |
|---|
| 574 | | $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE); |
|---|
| 575 | | |
|---|
| 576 | | if (count($parts) < 3) { |
|---|
| 577 | | # |
|---|
| 578 | | # End of $text reached with unbalenced tag(s). |
|---|
| 579 | | # In that case, we return original text unchanged and pass the |
|---|
| 580 | | # first character as filtered to prevent an infinite loop in the |
|---|
| 581 | | # parent function. |
|---|
| 582 | | # |
|---|
| 583 | | return array($original_text{0}, substr($original_text, 1)); |
|---|
| 584 | | } |
|---|
| 585 | | |
|---|
| 586 | | $block_text .= $parts[0]; # Text before current tag. |
|---|
| 587 | | $tag = $parts[1]; # Tag to handle. |
|---|
| 588 | | $text = $parts[2]; # Remaining text after current tag. |
|---|
| 589 | | |
|---|
| 590 | | # |
|---|
| 591 | | # Check for: Auto-close tag (like <hr/>) |
|---|
| 592 | | # Comments and Processing Instructions. |
|---|
| 593 | | # |
|---|
| 594 | | if (preg_match("{^</?(?:$auto_close_tags)\b}", $tag) || |
|---|
| 595 | | $tag{1} == '!' || $tag{1} == '?') |
|---|
| 596 | | { |
|---|
| 597 | | # Just add the tag to the block as if it was text. |
|---|
| 598 | | $block_text .= $tag; |
|---|
| 599 | | } |
|---|
| 600 | | else { |
|---|
| 601 | | # |
|---|
| 602 | | # Increase/decrease nested tag count. Only do so if |
|---|
| 603 | | # the tag's name match base tag's. |
|---|
| 604 | | # |
|---|
| 605 | | if (preg_match("{^</?$base_tag_name\b}", $tag)) { |
|---|
| 606 | | if ($tag{1} == '/') $depth--; |
|---|
| 607 | | else if ($tag{strlen($tag)-2} != '/') $depth++; |
|---|
| 608 | | } |
|---|
| 609 | | |
|---|
| 610 | | # |
|---|
| 611 | | # Check for `markdown="1"` attribute and handle it. |
|---|
| 612 | | # |
|---|
| 613 | | if ($md_attr && |
|---|
| 614 | | preg_match($markdown_attr_match, $tag, $attr_matches) && |
|---|
| 615 | | preg_match('/^(?:1|block|span)$/', $attr_matches[2])) |
|---|
| 616 | | { |
|---|
| 617 | | # Remove `markdown` attribute from opening tag. |
|---|
| 618 | | $tag = preg_replace($markdown_attr_match, '', $tag); |
|---|
| 619 | | |
|---|
| 620 | | # Check if text inside this tag must be parsed in span mode. |
|---|
| 621 | | $md_mode = $attr_matches[2]; |
|---|
| 622 | | $span_mode = $md_mode == 'span' || $md_mode != 'block' && |
|---|
| 623 | | preg_match("{^<(?:$contain_span_tags)\b}", $tag); |
|---|
| 624 | | |
|---|
| 625 | | # Calculate indent before tag. |
|---|
| 626 | | preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches); |
|---|
| 627 | | $indent = strlen($matches[1]); |
|---|
| 628 | | |
|---|
| 629 | | # End preceding block with this tag. |
|---|
| 630 | | $block_text .= $tag; |
|---|
| 631 | | $parsed .= $hash_function($block_text, $span_mode); |
|---|
| 632 | | |
|---|
| 633 | | # Get enclosing tag name for the ParseMarkdown function. |
|---|
| 634 | | preg_match('/^<([\w:$]*)\b/', $tag, $matches); |
|---|
| 635 | | $tag_name = $matches[1]; |
|---|
| 636 | | |
|---|
| 637 | | # Parse the content using the HTML-in-Markdown parser. |
|---|
| 638 | | list ($block_text, $text) |
|---|
| 639 | | = _HashHTMLBlocks_InMarkdown($text, $indent, |
|---|
| 640 | | $tag_name, $span_mode); |
|---|
| 641 | | |
|---|
| 642 | | # Outdent markdown text. |
|---|
| 643 | | if ($indent > 0) { |
|---|
| 644 | | $block_text = preg_replace("/^[ ]{1,$indent}/m", "", |
|---|
| 645 | | $block_text); |
|---|
| 646 | | } |
|---|
| 647 | | |
|---|
| 648 | | # Append tag content to parsed text. |
|---|
| 649 | | if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; |
|---|
| 650 | | else $parsed .= "$block_text"; |
|---|
| 651 | | |
|---|
| 652 | | # Start over a new block. |
|---|
| 653 | | $block_text = ""; |
|---|
| 654 | | } |
|---|
| 655 | | else $block_text .= $tag; |
|---|
| 656 | | } |
|---|
| 657 | | |
|---|
| 658 | | } while ($depth > 0); |
|---|
| 659 | | |
|---|
| 660 | | # |
|---|
| 661 | | # Hash last block text that wasn't processed inside the loop. |
|---|
| 662 | | # |
|---|
| 663 | | $parsed .= $hash_function($block_text); |
|---|
| 664 | | |
|---|
| 665 | | return array($parsed, $text); |
|---|
| 666 | | } |
|---|
| 667 | | function _HashHTMLBlocks_HashBlock($text) { |
|---|
| 668 | | global $md_html_hashes, $md_html_blocks; |
|---|
| | 359 | [ \t]* |
|---|
| | 360 | (?=\n{2,}|\Z) # followed by a blank line or end of document |
|---|
| | 361 | ) |
|---|
| | 362 | }x', |
|---|
| | 363 | '_HashHTMLBlocks_callback', |
|---|
| | 364 | $text); |
|---|
| | 365 | |
|---|
| | 366 | return $text; |
|---|
| | 367 | } |
|---|
| | 368 | function _HashHTMLBlocks_callback($matches) { |
|---|
| | 369 | global $md_html_blocks; |
|---|
| | 370 | $text = $matches[1]; |
|---|