Skip to content

Commit

Permalink
[not verified] Jetpack Sync: Checksums: Add support for User and User…
Browse files Browse the repository at this point in the history
… Meta tables. (#21139)

* Initial commit

* Change method visibility to allow easier extension on WPCOM.

* Add changelog entry

* Bump version

* Major rewrite to rely on `WP_User` objects. Changed the method visibilities to allow easier extending.

* Add `users` and `usermeta` to `checksum_all`

* Small refactoring to allow better WPCOM integration.

* reorder usermeta checksum to match the umeta_id used on WP.com. update the meta values to use the expand_user logic from the User modules (used during sync) and also apply sanitzation used on WP.com. This will allow for us to checksum directly against the db value.

* Count the items distinctively. This is temporary and should be fixed in a more general sense.

* Update the checksum generation to align with WP.com logic summation of individual key-values

* not applicable comment

* Distinct Usermeta count query
empty check for meta.

* Update the usermeta mapping to reference the users table directly instead of usermeta

Co-authored-by: Matthew Denton <[email protected]>
  • Loading branch information
2 people authored and retrofox committed Oct 1, 2021
1 parent 92a8286 commit 04d23a5
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: patch
Type: added

Add support for checksumming user-related tabled - wp_users and wp_usermeta
25 changes: 23 additions & 2 deletions projects/packages/sync/src/class-replicastore.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
namespace Automattic\Jetpack\Sync;

use Automattic\Jetpack\Sync\Replicastore\Table_Checksum;
use Automattic\Jetpack\Sync\Replicastore\Table_Checksum_Usermeta;
use Exception;
use WP_Error;

Expand Down Expand Up @@ -1311,7 +1312,7 @@ public function checksum_histogram( $table, $buckets = null, $start_id = null, $

$wpdb->queries = array();
try {
$checksum_table = new Table_Checksum( $table, $salt, $perform_text_conversion );
$checksum_table = $this->get_table_checksum_instance( $table, $salt, $perform_text_conversion );
} catch ( Exception $ex ) {
return new WP_Error( 'checksum_disabled', $ex->getMessage() );
}
Expand Down Expand Up @@ -1406,7 +1407,7 @@ private function invalid_call() {
private function calculate_buckets( $table, $start_id = null, $end_id = null ) {
// Get # of objects.
try {
$checksum_table = new Table_Checksum( $table );
$checksum_table = $this->get_table_checksum_instance( $table );
} catch ( Exception $ex ) {
return new WP_Error( 'checksum_disabled', $ex->getMessage() );
}
Expand All @@ -1429,4 +1430,24 @@ private function calculate_buckets( $table, $start_id = null, $end_id = null ) {

return (int) ceil( $object_count / $bucket_size );
}

/**
* Return an instance for `Table_Checksum`, depending on the table.
*
* Some tables require custom instances, due to different checksum logic.
*
* @param string $table The table that we want to get the instance for.
* @param null $salt Salt to be used when generating the checksums.
* @param false $perform_text_conversion Should we perform text encoding conversion when calculating the checksum.
*
* @return Table_Checksum|Table_Checksum_Usermeta
* @throws Exception Might throw an exception if any of the input parameters were invalid.
*/
public function get_table_checksum_instance( $table, $salt = null, $perform_text_conversion = false ) {
if ( 'usermeta' === $table ) {
return new Table_Checksum_Usermeta( $table, $salt, $perform_text_conversion );
}

return new Table_Checksum( $table, $salt, $perform_text_conversion );
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
<?php
/**
* Table Checksums Class.
*
* @package automattic/jetpack-sync
*/

namespace Automattic\Jetpack\Sync\Replicastore;

use Automattic\Jetpack\Sync;
use Automattic\Jetpack\Sync\Modules;
use WP_Error;
use WP_User_Query;

/**
* Class to handle Table Checksums for the User Meta table.
*/
class Table_Checksum_Usermeta extends Table_Checksum {
/**
* Calculate the checksum based on provided range and filters.
*
* @param int|null $range_from The start of the range.
* @param int|null $range_to The end of the range.
* @param array|null $filter_values Additional filter values. Not used at the moment.
* @param bool $granular_result If the returned result should be granular or only the checksum.
* @param bool $simple_return_value If we want to use a simple return value for non-granular results (return only the checksum, without wrappers).
*
* @return array|mixed|object|WP_Error|null
*/
public function calculate_checksum( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false, $simple_return_value = true ) {

if ( ! Sync\Settings::is_checksum_enabled() ) {
return new WP_Error( 'checksum_disabled', 'Checksums are currently disabled.' );
}

/**
* First we need to fetch the user IDs for the users that we want to include in the range.
*
* To keep things a bit simple and avoid filtering issues, let's reuse the `build_filter_statement` that already
* exists. Unfortunately we don't
*/
global $wpdb;

// This call depends on the `range_field` pointing to the `ID` field of the `users` table. Currently, "ID".
$range_filter_statement = $this->build_filter_statement( $range_from, $range_to );

$query = "
SELECT
DISTINCT {$this->range_field}
FROM
{$this->table}
WHERE
{$range_filter_statement}
";

// phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
$user_ids = $wpdb->get_col( $query );

// Chunk the array down to make sure we don't overload the database with queries that are too large.
$chunked_user_ids = array_chunk( $user_ids, 500 );

$checksum_entries = array();

foreach ( $chunked_user_ids as $user_ids_chunk ) {
$user_objects = $this->get_user_objects_by_ids( $user_ids_chunk );

foreach ( $user_objects as $user_object ) {
// expand and sanitize desired meta based on WP.com logic.
$user_object = $this->expand_and_sanitize_user_meta( $user_object );

// Generate checksum entry based on the serialized value if not empty.
$checksum_entry = 0;
if ( ! empty( $user_object->roles ) ) {
$checksum_entry = crc32( implode( '#', array( $this->salt, 'roles', maybe_serialize( $user_object->roles ) ) ) );
}
if ( ! empty( $user_object->allcaps ) ) {
$checksum_entry += crc32( implode( '#', array( $this->salt, 'capabilities', maybe_serialize( $user_object->allcaps ) ) ) );
}
if ( ! empty( $user_object->locale ) ) {
$checksum_entry += crc32( implode( '#', array( $this->salt, 'locale', maybe_serialize( $user_object->locale ) ) ) );
}
if ( ! empty( $user_object->allowed_mime_types ) ) {
$checksum_entry += crc32( implode( '#', array( $this->salt, 'allowed_mime_types', maybe_serialize( $user_object->allowed_mime_types ) ) ) );
}
$checksum_entries[ $user_object->ID ] = $checksum_entry;
}
}

// Non-granular results need only to sum the different entries.
if ( ! $granular_result ) {
$checksum_sum = 0;
foreach ( $checksum_entries as $entry ) {
$checksum_sum += $entry;
}

if ( $simple_return_value ) {
return $checksum_sum;
}

return array(
'range' => $range_from . '-' . $range_to,
'checksum' => $checksum_sum,
);

}

// Granular results.
$response = $checksum_entries;

// Sort the return value for easier comparisons and code flows further down the line.
ksort( $response );

return $response;
}

/**
* Expand the User Object with additional meta santized by WP.com logic.
*
* @param mixed $user_object User Object from WP_User_Query.
*
* @return mixed $user_object expanded User Object.
*/
protected function expand_and_sanitize_user_meta( $user_object ) {
$user_module = Modules::get_module( 'users' );
// Expand User Objects based on Sync logic.
$user_object = $user_module->expand_user( $user_object );

// Sanitize location.
if ( ! empty( $user_object->locale ) ) {
$user_object->locale = wp_strip_all_tags( $user_object->locale, true );
}

// Sanitize allcaps.
if ( ! empty( $user_object->allcaps ) ) {
$user_object->allcaps = array_map(
function ( $cap ) {
return (bool) $cap;
},
$user_object->allcaps
);
}

// Sanitize allowed_mime_types.
foreach ( $user_object->allowed_mime_types as $allowed_mime_type_short => $allowed_mime_type_long ) {
$allowed_mime_type_short = wp_strip_all_tags( (string) $allowed_mime_type_short, true );
$allowed_mime_type_long = wp_strip_all_tags( (string) $allowed_mime_type_long, true );
$user_object->allowed_mime_types[ $allowed_mime_type_short ] = $allowed_mime_type_long;
}

// Sanitize roles.
if ( is_array( $user_object->roles ) ) {
$user_object->roles = array_map( 'sanitize_text_field', $user_object->roles );
}
return $user_object;
}

/**
* Gets a list of `WP_User` objects by their IDs
*
* @param array $ids List of IDs to fetch.
*
* @return array
*/
protected function get_user_objects_by_ids( $ids ) {
$user_query = new WP_User_Query( array( 'include' => $ids ) );

return $user_query->get_results();
}
}
51 changes: 37 additions & 14 deletions projects/packages/sync/src/replicastore/class-table-checksum.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Table_Checksum {
*
* @var boolean
*/
private $perform_text_conversion = false;
protected $perform_text_conversion = false;

/**
* Field to be used for range queries.
Expand Down Expand Up @@ -106,28 +106,28 @@ class Table_Checksum {
*
* @var mixed|null
*/
private $parent_table = null;
protected $parent_table = null;

/**
* What field to use for the parent table join, if it has a "parent" table.
*
* @var mixed|null
*/
private $parent_join_field = null;
protected $parent_join_field = null;

/**
* What field to use for the table join, if it has a "parent" table.
*
* @var mixed|null
*/
private $table_join_field = null;
protected $table_join_field = null;

/**
* Some tables might not exist on the remote, and we want to verify they exist, before trying to query them.
*
* @var callable
*/
private $is_table_enabled_callback = false;
protected $is_table_enabled_callback = false;

/**
* Table_Checksum constructor.
Expand Down Expand Up @@ -177,7 +177,7 @@ public function __construct( $table, $salt = null, $perform_text_conversion = fa
*
* @return array
*/
private function get_default_tables() {
protected function get_default_tables() {
global $wpdb;

return array(
Expand Down Expand Up @@ -280,6 +280,29 @@ private function get_default_tables() {
'table_join_field' => 'order_item_id',
'is_table_enabled_callback' => array( $this, 'enable_woocommerce_tables' ),
),
'users' => array(
'table' => $wpdb->users,
'range_field' => 'ID',
'key_fields' => array( 'ID' ),
'checksum_text_fields' => array( 'user_login', 'user_nicename', 'user_email', 'user_url', 'user_registered', 'user_status', 'display_name' ),
'filter_values' => array(),
),

/**
* Usermeta is a special table, as it needs to use a custom override flow,
* as the user roles, capabilities, locale, mime types can be filtered by plugins.
* This prevents us from doing a direct comparison in the database.
*/
'usermeta' => array(
'table' => $wpdb->users,
/**
* Range field points to ID, which in this case is the `WP_User` ID,
* since we're querying the whole WP_User objects, instead of meta entries in the DB.
*/
'range_field' => 'ID',
'key_fields' => array(),
'checksum_fields' => array(),
),
);
}

Expand All @@ -288,7 +311,7 @@ private function get_default_tables() {
*
* @param array $table_configuration The table configuration array.
*/
private function prepare_fields( $table_configuration ) {
protected function prepare_fields( $table_configuration ) {
$this->key_fields = $table_configuration['key_fields'];
$this->range_field = $table_configuration['range_field'];
$this->checksum_fields = isset( $table_configuration['checksum_fields'] ) ? $table_configuration['checksum_fields'] : array();
Expand All @@ -309,7 +332,7 @@ private function prepare_fields( $table_configuration ) {
* @return mixed|string
* @throws Exception Throw an exception on validation failure.
*/
private function validate_table_name( $table ) {
protected function validate_table_name( $table ) {
if ( empty( $table ) ) {
throw new Exception( 'Invalid table name: empty' );
}
Expand All @@ -328,7 +351,7 @@ private function validate_table_name( $table ) {
*
* @throws Exception Throw an exception on failure to validate.
*/
private function validate_fields( $fields ) {
protected function validate_fields( $fields ) {
foreach ( $fields as $field ) {
if ( ! preg_match( '/^[0-9,a-z,A-Z$_]+$/i', $field ) ) {
throw new Exception( "Invalid field name: $field is not allowed" );
Expand All @@ -346,7 +369,7 @@ private function validate_fields( $fields ) {
* @return bool
* @throws Exception Throw an exception on failure to validate.
*/
private function validate_fields_against_table( $fields ) {
protected function validate_fields_against_table( $fields ) {
global $wpdb;

$valid_fields = array();
Expand All @@ -373,7 +396,7 @@ private function validate_fields_against_table( $fields ) {
*
* @throws Exception Throw an exception on failure to validate in the internal functions.
*/
private function validate_input() {
protected function validate_input() {
$fields = array_merge( array( $this->range_field ), $this->key_fields, $this->checksum_fields, $this->checksum_text_fields );

$this->validate_fields( $fields );
Expand All @@ -388,7 +411,7 @@ private function validate_input() {
*
* @return array|null
*/
private function prepare_filter_values_as_sql( $filter_values = array(), $table_prefix = '' ) {
protected function prepare_filter_values_as_sql( $filter_values = array(), $table_prefix = '' ) {
global $wpdb;

if ( ! is_array( $filter_values ) ) {
Expand Down Expand Up @@ -497,7 +520,7 @@ public function build_filter_statement( $range_from = null, $range_to = null, $f
*
* @throws Exception Throws and exception if validation fails in the internal function calls.
*/
private function build_checksum_query( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false ) {
protected function build_checksum_query( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false ) {
global $wpdb;

// Escape the salt.
Expand Down Expand Up @@ -773,7 +796,7 @@ public function calculate_checksum( $range_from = null, $range_to = null, $filte
*
* @return bool
*/
private function enable_woocommerce_tables() {
protected function enable_woocommerce_tables() {
/**
* On WordPress.com, we can't directly check if the site has support for WooCommerce.
* Having the option to override the functionality here helps with syncing WooCommerce tables.
Expand Down

0 comments on commit 04d23a5

Please sign in to comment.