Skip to content

Commit

Permalink
Merge pull request #181 from mattwiebe/markdown-preserve
Browse files Browse the repository at this point in the history
Markdown: preserve all data in code blocks
  • Loading branch information
blobaugh committed Feb 8, 2014
2 parents a4b0957 + f13ca4d commit afd22d7
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 12 deletions.
61 changes: 55 additions & 6 deletions _inc/lib/markdown/gfm.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
* @author Matt Wiebe <[email protected]>
* @link https://github.com/evansolomon/wp-github-flavored-markdown-comments
*
* Add a few extras from GitHub's Markdown implementation. Must be used
* in a WordPress environment if the $preserve_shortcodes member is set to true,
* which will be auto-detected initially on __construct()
* Add a few extras from GitHub's Markdown implementation. Must be used in a WordPress environment.
*/

class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser {
Expand Down Expand Up @@ -102,6 +100,51 @@ public function transform( $text ) {
return $text;
}

/**
* Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping.
* @param string $text Markdown/HTML content
* @return string Markdown/HTML content with escaped code blocks
*/
public function codeblock_preserve( $text ) {
$text = preg_replace_callback( "/^(`{3})([^`\n]+)?\n([^`~]+)(`{3})/m", array( $this, 'do_codeblock_preserve' ), $text );
$text = preg_replace_callback( "/^(~{3})([^~\n]+)?\n([^~~]+)(~{3})/m", array( $this, 'do_codeblock_preserve' ), $text );
return $text;
}

/**
* Regex callback for code block preservation.
* @param array $matches Regex matches
* @return string Codeblock with escaped interior
*/
public function do_codeblock_preserve( $matches ) {
$block = stripslashes( $matches[3] );
$block = esc_html( $block );
$open = $matches[1] . $matches[2] . "\n";
return $open . $block . $matches[4];
}

/**
* Restore previously preserved (i.e. escaped) code block contents.
* @param string $text Markdown/HTML content with escaped code blocks
* @return string Markdown/HTML content
*/
public function codeblock_restore( $text ) {
$text = preg_replace_callback( "/^(`{3})([^`\n]+)?\n([^`~]+)(`{3})/m", array( $this, 'do_codeblock_restore' ), $text );
$text = preg_replace_callback( "/^(~{3})([^~\n]+)?\n([^~~]+)(~{3})/m", array( $this, 'do_codeblock_restore' ), $text );
return $text;
}

/**
* Regex callback for code block restoration (unescaping).
* @param array $matches Regex matches
* @return string Codeblock with unescaped interior
*/
public function do_codeblock_restore( $matches ) {
$block = html_entity_decode( $matches[3] );
$open = $matches[1] . $matches[2] . "\n";
return $open . $block . $matches[4];
}

/**
* Called to preserve legacy LaTeX like $latex some-latex-text $
* @param string $text Text in which to preserve LaTeX
Expand Down Expand Up @@ -254,17 +297,23 @@ public function _doEscapeForHashWithoutSpacing( $m ) {
* Overload to support Viper's [code] shortcode. Because awesome.
*/
public function _doFencedCodeBlocks_callback( $matches ) {
// just MarkdownExtra_Parser if we're not going ultra-deluxe, or if
// there wasn't a language class passed
if ( ! $this->use_code_shortcode || empty( $matches[2] ) )
// just MarkdownExtra_Parser if we're not going ultra-deluxe
if ( ! $this->use_code_shortcode ) {
return parent::_doFencedCodeBlocks_callback( $matches );
}

// default to a "text" class if one wasn't passed. Helps with encoding issues later.
if ( empty( $matches[2] ) ) {
$matches[2] = 'text';
}

$classname =& $matches[2];
$codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] );

if ( $classname{0} == '.' )
$classname = substr( $classname, 1 );

$codeblock = esc_html( $codeblock );
$codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end;
return "\n\n" . $this->hashBlock( $codeblock ). "\n\n";
}
Expand Down
48 changes: 42 additions & 6 deletions modules/markdown/easy-markdown.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public function load_markdown_for_posts() {
add_action( 'wp_restore_post_revision', array( $this, 'wp_restore_post_revision' ), 10, 2 );
add_filter( '_wp_post_revision_fields', array( $this, '_wp_post_revision_fields' ) );
add_action( 'xmlrpc_call', array( $this, 'xmlrpc_actions' ) );
add_filter( 'content_save_pre', array( $this, 'preserve_code_blocks' ), 1 );
if ( defined( 'XMLRPC_REQUEST' ) && XMLRPC_REQUEST ) {
$this->check_for_mwgetpost();
}
Expand All @@ -116,6 +117,7 @@ public function unload_markdown_for_posts() {
remove_action( 'wp_restore_post_revision', array( $this, 'wp_restore_post_revision' ), 10, 2 );
remove_filter( '_wp_post_revision_fields', array( $this, '_wp_post_revision_fields' ) );
remove_action( 'xmlrpc_call', array( $this, 'xmlrpc_actions' ) );
remove_filter( 'content_save_pre', array( $this, 'preserve_code_blocks' ), 1 );
}

/**
Expand Down Expand Up @@ -194,6 +196,15 @@ public function o2_unescape_lists( $text ) {
return preg_replace( '/^[&]\#042; /um', '* ', $text );
}

/**
* Preserve code blocks from being munged by KSES before they have a chance
* @param string $text post content
* @return string post content with code blocks escaped
*/
public function preserve_code_blocks( $text ) {
return $this->get_parser()->codeblock_preserve( $text );
}

/**
* Remove KSES if it's there. Store the result to manually invoke later if needed.
* @return null
Expand Down Expand Up @@ -348,8 +359,10 @@ protected function get_post_screen_post_type() {
public function edit_post_content( $content, $id ) {
if ( $this->is_markdown( $id ) ) {
$post = get_post( $id );
if ( $post && ! empty( $post->post_content_filtered ) )
$content = $post->post_content_filtered;
if ( $post && ! empty( $post->post_content_filtered ) ) {
$post = $this->swap_for_editing( $post );
return $post->post_content;
}
}
return $content;
}
Expand Down Expand Up @@ -462,12 +475,16 @@ protected function comment_hash( $content ) {
* @param array $args Arguments, with keys:
* id: provide a string to prefix footnotes with a unique identifier
* unslash: when true, expects and returns slashed data
* decode_code_blocks: when true, assume that text in fenced code blocks is already
* HTML encoded and should be decoded before being passed to Markdown, which does
* its own encoding.
* @return string Markdown-processed content
*/
public function transform( $text, $args = array() ) {
$args = wp_parse_args( $args, array(
'id' => false,
'unslash' => true
'unslash' => true,
'decode_code_blocks' => ! $this->get_parser()->use_code_shortcode
) );
// probably need to unslash
if ( $args['unslash'] )
Expand All @@ -482,6 +499,10 @@ public function transform( $text, $args = array() ) {
$text = preg_replace( '/^&gt;/m', '>', $text );
// prefixes are because we need to namespace footnotes by post_id
$this->get_parser()->fn_id_prefix = $args['id'] ? $args['id'] . '-' : '';
// If we're not using the code shortcode, prevent over-encoding.
if ( $args['decode_code_blocks'] ) {
$text = $this->get_parser()->codeblock_restore( $text );
}
// Transform it!
$text = $this->get_parser()->transform( $text );
// Fix footnotes - kses doesn't like the : IDs it supplies
Expand Down Expand Up @@ -595,9 +616,7 @@ private function prime_post_cache( $post_id = false ) {
$post = get_post( $post_id );
if ( ! empty( $post->post_content_filtered ) ) {
wp_cache_delete( $post->ID, 'posts' );
$markdown = $post->post_content_filtered;
$post->post_content_filtered = $post->post_content;
$post->post_content = $markdown;
$post = $this->swap_for_editing( $post );
wp_cache_add( $post->ID, $post, 'posts' );
$this->posts_to_uncache[] = $post_id;
}
Expand All @@ -608,6 +627,23 @@ private function prime_post_cache( $post_id = false ) {
}
}

/**
* Swaps `post_content_filtered` back to `post_content` for editing purposes.
* @param object $post WP_Post object
* @return object WP_Post object with swapped `post_content_filtered` and `post_content`
*/
protected function swap_for_editing( $post ) {
$markdown = $post->post_content_filtered;
// unencode encoded code blocks
$markdown = $this->get_parser()->codeblock_restore( $markdown );
// restore beginning of line blockquotes
$markdown = preg_replace( '/^&gt; /m', '> ', $markdown );
$post->post_content_filtered = $post->post_content;
$post->post_content = $markdown;
return $post;
}


/**
* We munge the post cache to serve proper markdown content to XML-RPC clients.
* Uncache these after the XML-RPC session ends.
Expand Down

0 comments on commit afd22d7

Please sign in to comment.