Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Markdown: preserve all data in code blocks #181

Merged
merged 1 commit into from
Feb 8, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 55 additions & 6 deletions _inc/lib/markdown/gfm.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
* @author Matt Wiebe <[email protected]>
* @link https://github.com/evansolomon/wp-github-flavored-markdown-comments
*
* Add a few extras from GitHub's Markdown implementation. Must be used
* in a WordPress environment if the $preserve_shortcodes member is set to true,
* which will be auto-detected initially on __construct()
* Add a few extras from GitHub's Markdown implementation. Must be used in a WordPress environment.
*/

class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser {
Expand Down Expand Up @@ -102,6 +100,51 @@ public function transform( $text ) {
return $text;
}

/**
* Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping.
* @param string $text Markdown/HTML content
* @return string Markdown/HTML content with escaped code blocks
*/
public function codeblock_preserve( $text ) {
$text = preg_replace_callback( "/^(`{3})([^`\n]+)?\n([^`~]+)(`{3})/m", array( $this, 'do_codeblock_preserve' ), $text );
$text = preg_replace_callback( "/^(~{3})([^~\n]+)?\n([^~~]+)(~{3})/m", array( $this, 'do_codeblock_preserve' ), $text );
return $text;
}

/**
* Regex callback for code block preservation.
* @param array $matches Regex matches
* @return string Codeblock with escaped interior
*/
public function do_codeblock_preserve( $matches ) {
$block = stripslashes( $matches[3] );
$block = esc_html( $block );
$open = $matches[1] . $matches[2] . "\n";
return $open . $block . $matches[4];
}

/**
* Restore previously preserved (i.e. escaped) code block contents.
* @param string $text Markdown/HTML content with escaped code blocks
* @return string Markdown/HTML content
*/
public function codeblock_restore( $text ) {
$text = preg_replace_callback( "/^(`{3})([^`\n]+)?\n([^`~]+)(`{3})/m", array( $this, 'do_codeblock_restore' ), $text );
$text = preg_replace_callback( "/^(~{3})([^~\n]+)?\n([^~~]+)(~{3})/m", array( $this, 'do_codeblock_restore' ), $text );
return $text;
}

/**
* Regex callback for code block restoration (unescaping).
* @param array $matches Regex matches
* @return string Codeblock with unescaped interior
*/
public function do_codeblock_restore( $matches ) {
$block = html_entity_decode( $matches[3] );
$open = $matches[1] . $matches[2] . "\n";
return $open . $block . $matches[4];
}

/**
* Called to preserve legacy LaTeX like $latex some-latex-text $
* @param string $text Text in which to preserve LaTeX
Expand Down Expand Up @@ -254,17 +297,23 @@ public function _doEscapeForHashWithoutSpacing( $m ) {
* Overload to support Viper's [code] shortcode. Because awesome.
*/
public function _doFencedCodeBlocks_callback( $matches ) {
// just MarkdownExtra_Parser if we're not going ultra-deluxe, or if
// there wasn't a language class passed
if ( ! $this->use_code_shortcode || empty( $matches[2] ) )
// just MarkdownExtra_Parser if we're not going ultra-deluxe
if ( ! $this->use_code_shortcode ) {
return parent::_doFencedCodeBlocks_callback( $matches );
}

// default to a "text" class if one wasn't passed. Helps with encoding issues later.
if ( empty( $matches[2] ) ) {
$matches[2] = 'text';
}

$classname =& $matches[2];
$codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] );

if ( $classname{0} == '.' )
$classname = substr( $classname, 1 );

$codeblock = esc_html( $codeblock );
$codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end;
return "\n\n" . $this->hashBlock( $codeblock ). "\n\n";
}
Expand Down
48 changes: 42 additions & 6 deletions modules/markdown/easy-markdown.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public function load_markdown_for_posts() {
add_action( 'wp_restore_post_revision', array( $this, 'wp_restore_post_revision' ), 10, 2 );
add_filter( '_wp_post_revision_fields', array( $this, '_wp_post_revision_fields' ) );
add_action( 'xmlrpc_call', array( $this, 'xmlrpc_actions' ) );
add_filter( 'content_save_pre', array( $this, 'preserve_code_blocks' ), 1 );
if ( defined( 'XMLRPC_REQUEST' ) && XMLRPC_REQUEST ) {
$this->check_for_mwgetpost();
}
Expand All @@ -116,6 +117,7 @@ public function unload_markdown_for_posts() {
remove_action( 'wp_restore_post_revision', array( $this, 'wp_restore_post_revision' ), 10, 2 );
remove_filter( '_wp_post_revision_fields', array( $this, '_wp_post_revision_fields' ) );
remove_action( 'xmlrpc_call', array( $this, 'xmlrpc_actions' ) );
remove_filter( 'content_save_pre', array( $this, 'preserve_code_blocks' ), 1 );
}

/**
Expand Down Expand Up @@ -194,6 +196,15 @@ public function o2_unescape_lists( $text ) {
return preg_replace( '/^[&]\#042; /um', '* ', $text );
}

/**
* Preserve code blocks from being munged by KSES before they have a chance
* @param string $text post content
* @return string post content with code blocks escaped
*/
public function preserve_code_blocks( $text ) {
return $this->get_parser()->codeblock_preserve( $text );
}

/**
* Remove KSES if it's there. Store the result to manually invoke later if needed.
* @return null
Expand Down Expand Up @@ -348,8 +359,10 @@ protected function get_post_screen_post_type() {
public function edit_post_content( $content, $id ) {
if ( $this->is_markdown( $id ) ) {
$post = get_post( $id );
if ( $post && ! empty( $post->post_content_filtered ) )
$content = $post->post_content_filtered;
if ( $post && ! empty( $post->post_content_filtered ) ) {
$post = $this->swap_for_editing( $post );
return $post->post_content;
}
}
return $content;
}
Expand Down Expand Up @@ -462,12 +475,16 @@ protected function comment_hash( $content ) {
* @param array $args Arguments, with keys:
* id: provide a string to prefix footnotes with a unique identifier
* unslash: when true, expects and returns slashed data
* decode_code_blocks: when true, assume that text in fenced code blocks is already
* HTML encoded and should be decoded before being passed to Markdown, which does
* its own encoding.
* @return string Markdown-processed content
*/
public function transform( $text, $args = array() ) {
$args = wp_parse_args( $args, array(
'id' => false,
'unslash' => true
'unslash' => true,
'decode_code_blocks' => ! $this->get_parser()->use_code_shortcode
) );
// probably need to unslash
if ( $args['unslash'] )
Expand All @@ -482,6 +499,10 @@ public function transform( $text, $args = array() ) {
$text = preg_replace( '/^&gt;/m', '>', $text );
// prefixes are because we need to namespace footnotes by post_id
$this->get_parser()->fn_id_prefix = $args['id'] ? $args['id'] . '-' : '';
// If we're not using the code shortcode, prevent over-encoding.
if ( $args['decode_code_blocks'] ) {
$text = $this->get_parser()->codeblock_restore( $text );
}
// Transform it!
$text = $this->get_parser()->transform( $text );
// Fix footnotes - kses doesn't like the : IDs it supplies
Expand Down Expand Up @@ -595,9 +616,7 @@ private function prime_post_cache( $post_id = false ) {
$post = get_post( $post_id );
if ( ! empty( $post->post_content_filtered ) ) {
wp_cache_delete( $post->ID, 'posts' );
$markdown = $post->post_content_filtered;
$post->post_content_filtered = $post->post_content;
$post->post_content = $markdown;
$post = $this->swap_for_editing( $post );
wp_cache_add( $post->ID, $post, 'posts' );
$this->posts_to_uncache[] = $post_id;
}
Expand All @@ -608,6 +627,23 @@ private function prime_post_cache( $post_id = false ) {
}
}

/**
* Swaps `post_content_filtered` back to `post_content` for editing purposes.
* @param object $post WP_Post object
* @return object WP_Post object with swapped `post_content_filtered` and `post_content`
*/
protected function swap_for_editing( $post ) {
$markdown = $post->post_content_filtered;
// unencode encoded code blocks
$markdown = $this->get_parser()->codeblock_restore( $markdown );
// restore beginning of line blockquotes
$markdown = preg_replace( '/^&gt; /m', '> ', $markdown );
$post->post_content_filtered = $post->post_content;
$post->post_content = $markdown;
return $post;
}


/**
* We munge the post cache to serve proper markdown content to XML-RPC clients.
* Uncache these after the XML-RPC session ends.
Expand Down