Willkommen bei WordPress. Dies ist dein erster Beitrag. Bearbeite oder lösche ihn und beginne mit dem Schreiben!
Hallo Welt!
von raredesign | Dez 3, 2019 | Allgemein | 0 Kommentare
Cokiee Shell
Current Path : /var/www/web28/html/wp-content/plugins/autodescription/inc/classes/helper/format/ |
Current File : //var/www/web28/html/wp-content/plugins/autodescription/inc/classes/helper/format/strings.class.php |
<?php /** * @package The_SEO_Framework\Classes\Helper\Format\Strings * @subpackage The_SEO_Framework\Formatting */ namespace The_SEO_Framework\Helper\Format; \defined( 'THE_SEO_FRAMEWORK_PRESENT' ) or die; /** * The SEO Framework plugin * Copyright (C) 2023 - 2024 Sybre Waaijer, CyberWire B.V. (https://cyberwire.nl/) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3 as published * by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /** * Holds methods for String interpretation and conversion. * String is a reserved keyword, so we use Strings. * * @since 5.0.0 * * @access protected * Use tsf()->format()->strings() instead. */ class Strings { /** * Shortens string and adds ellipses when over a threshold in length. * * @since 3.1.0 * @since 4.2.0 No longer prepends a space before the hellip. * @since 5.0.0 1. Now uses mb_* to determine the string length. * 2. Moved from \The_SEO_Framework\Load. * * @param string $string The string to test and maybe trim * @param int $over The character limit. Must be over 0 to have effect. * Bug: If 1 is given, the returned string length will be 3. * Bug: If 2 is given, the returned string will only consist of the hellip. * @return string */ public static function hellip_if_over( $string, $over = 0 ) { if ( $over > 0 && \mb_strlen( $string ) > $over ) return mb_substr( $string, 0, abs( $over - 2 ) ) . '…'; return $string; } /** * Counts words encounters from input string. * Case insensitive. Returns first encounter of each word if found multiple times. * * Will only return words that are above set input thresholds. * * @since 2.7.0 * @since 3.1.0 This method now uses PHP 5.4+ encoding, capable of UTF-8 interpreting, * instead of relying on PHP's incomplete encoding table. * This does mean that the functionality is crippled when the PHP * installation isn't unicode compatible; this is unlikely. * @since 4.0.0 1. Now expects PCRE UTF-8 encoding support. * 2. Moved input-parameter alterting filters outside of this function. * 3. Short length now works as intended, instead of comparing as less, it compares as less or equal to. * @since 4.2.0 Now supports detection of connector-dashes, connector-punctuation, and closing quotes, * and recognizes those as whole words. * @since 5.0.0 1. Now converts input string as UTF-8. This mainly solves issues with attached quotes (d'anglais). * 2. Moved from \The_SEO_Framework\Load. * * @param string $string Required. The string to count words in. * @param array $args { * Optional. The word counting arguments. * * @type int $filter_under Consider word duplication under this number not a duplicate, default 3. * @type int $filter_short_under Consider short word duplication under this number not a duplicate, default 5. * @type int $short_word_length Consider words under this character length "short", default 3. * } * @return array Containing arrays of words with their count. */ public static function get_word_count( $string, $args = [] ) { // Why not blog_charset? Because blog_charset is there only to onboard non-UTF-8 to UTF-8. $string = \wp_check_invalid_utf8( html_entity_decode( $string, \ENT_QUOTES, 'UTF-8' ) ); if ( empty( $string ) ) return []; $args += [ 'filter_under' => 3, 'filter_short_under' => 5, 'short_word_length' => 3, ]; // Don't use polyfills; we're going for speed over accuracy. Hosts must do their job correctly. $use_mb = \extension_loaded( 'mbstring' ); $word_list = preg_split( '/[^\p{Cc}\p{L}\p{N}\p{Pc}\p{Pd}\p{Pf}\'"]+/mu', $use_mb ? mb_strtolower( $string ) : strtolower( $string ), -1, \PREG_SPLIT_OFFSET_CAPTURE | \PREG_SPLIT_NO_EMPTY, ); if ( empty( $word_list ) ) return []; // 0 = word, 1 = offset. So we get [ offset => word ]. // We want the offset because we relay how the word is first spelled. $words = array_column( $word_list, 0, 1 ); $word_offsets = array_flip( array_reverse( $words, true ) ); $min_count = min( $args['filter_under'], $args['filter_short_under'] ); $words_too_many = []; foreach ( array_count_values( $words ) as $word => $count ) { // Skip strlen if the word is counted fewer than any requirements. if ( $count < $min_count ) continue; if ( ( $use_mb ? mb_strlen( $word ) : \strlen( $word ) ) <= $args['short_word_length'] ) { if ( $count < $args['filter_short_under'] ) continue; } else { if ( $count < $args['filter_under'] ) continue; } // !! Don't use mb_* here. preg_split's offset is in bytes, NOT multibytes. $first_encountered_word = substr( $string, $word_offsets[ $word ], \strlen( $word ) ); $words_too_many[] = [ $first_encountered_word => $count ]; } return $words_too_many; } /** * Trims an sentence by word and determines sentence stops. * * Warning: Returns with entities encoded. The output is not safe for printing. * * @since 2.6.0 * @since 3.1.0 1. Now uses smarter trimming. * 2. Deprecated 2nd parameter. * 3. Now has unicode support for sentence closing. * 4. Now strips last three words when preceded by a sentence closing separator. * 5. Now always leads with (inviting) dots, even if the sentence is shorter than $max_char_length. * @since 4.0.0 1. Now stops parsing earlier on failure. * 2. Now performs faster queries. * 3. Now maintains last sentence with closing punctuations. * @since 4.0.5 1. Now decodes the sentence input, improving accuracy, and so that HTML entities at * the end won't be transformed into gibberish. * @since 4.1.0 1. Now texturizes the sentence input, improving accuracy with included closing & final punctuation support. * 2. Now performs even faster queries, in most situations. (0.2ms/0.02ms total (worst/best) @ PHP 7.3/PCRE 11). * Mind you, this method probably boots PCRE and wptexturize; so, it'll be slower than what we noted--it's * overhead that otherwise WP, the theme, or other plugin would cause anyway. So, deduct that. * 3. Now recognizes connector and final punctuations for preliminary sentence bounding. * 4. Leading punctuation now excludes symbols, special annotations, opening brackets and quotes, * and marks used in some latin languages like ¡¿. * 5. Is now able to always strip leading punctuation. * 6. It will now strip leading colon characters. * 7. It will now stop counting trailing words towards new sentences when a connector, dash, mark, or ¡¿ is found. * 8. Now returns encoded entities once more. So that the return value can be treated the same as anything else * revolving around descriptions--preventing double transcoding like `&amp; > & > &` instead of `&`. * @since 4.1.5 1. The second parameter now accepts values again. From "current description length" to minimum accepted char length. * 2. Can now return an empty string when the input string doesn't satisfy the minimum character length. * 3. The third parameter now defaults to 4096, so no longer unexpected results are created. * 4. Resolved some backtracking issues. * 5. Resolved an issue where a character followed by punctuation would cause the match to fail. * @since 4.2.0 Now enforces at least a character length of 1. This prevents needless processing. * @since 4.2.7 Now considers floating numerics as one word. * @since 5.0.0 1. Moved from \The_SEO_Framework\Load. * 2. Renamed from `trim_excerpt()`. * 3. Anchored the first regex to the start prevent catastrophic backtracking when no spacing is found. * 4. Forced a useful match in the first regex to prevent catastrophic backtracking in the second regex. * @since 5.0.3 In the first regex, the last word of a sentence shorter than maximum length without leading punctuation is now considered. * @see https://secure.php.net/manual/en/regexp.reference.unicode.php * * We use `[^\P{Po}\'\":]` because WordPress texturizes ' and " to fall under `\P{Po}`. * This is perfect. Please have the courtesy to credit us when taking it. :) * * @param string $sentence The untrimmed sentence. Expected not to contain any HTML operators. * @param int $min_char_length The minimum character length. Set to 0 to ignore the requirement. * This is read as a SUGGESTION. Multibyte characters will create inaccuracies. * @param int $max_char_length At what point to shave off the sentence. * @return string The trimmed sentence with encoded entities. Needs escaping prior printing. */ public static function clamp_sentence( $sentence, $min_char_length = 1, $max_char_length = 4096 ) { // At least 1. $min_char_length = max( 1, $min_char_length ); // We should _actually_ use mb_strlen, but that's wasteful on resources for something benign. // We'll rectify that later, somewhat, where characters are transformed. // We could also use preg_match_all( '/./u' ); or count( preg_split( '/./u', $sentence, $min_char_length ) ); // But, again, that'll eat CPU cycles. if ( \strlen( $sentence ) < $min_char_length ) return ''; // Decode to get a more accurate character length in Unicode. $sentence = trim( html_entity_decode( $sentence, \ENT_QUOTES, 'UTF-8' ) ); // Find all words until $max_char_length, and trim when the last word boundary or punctuation is found. // Tries to match "\x20" when the sentence contains no spaces, subsequently failing because trim() already removed that. // Uses $ to consider cut-off endings under $max_char_length preg_match( \sprintf( '/^.{0,%d}(?:[^\P{Po}\'\":]|[\p{Pc}\p{Pd}\p{Pf}\p{Z}]|\x20|$)/su', $max_char_length, ), $sentence, $matches, ); $sentence = trim( $matches[0] ?? '' ); if ( \strlen( $sentence ) < $min_char_length ) return ''; // Texturize to recognize the sentence structure. Decode thereafter since we get HTML returned. $sentence = html_entity_decode( \wptexturize( htmlentities( $sentence, \ENT_QUOTES, 'UTF-8', ) ), \ENT_QUOTES, 'UTF-8', ); /** * Play with it here: * https://regex101.com/r/u0DIgx/5/ (old) * https://regex101.com/r/G92lUt/5 (old) * https://regex101.com/r/dAqhWC/1 (current) * * TODO Group 4's match is repeated. However, referring to it as (4) will cause it to congeal into 3. * TODO `([\p{Z}\w])` will try to match any word boundary even if there aren't any. This must be detected above. * e.g., a sentence consisting ONLY of `''''` will cause catastrophic backtracking. * Note: Group 4 misses `?\p{Z}*` between `.+` and `[\p{Pc}`, but I couldn't find a use-case for it. * * Note to self: Do not anchor to start of sentence. * Critically optimized (worst case: 217 logic steps), so the $matches don't make much sense. Bear with me: * * @param array $matches : { * 0 : Full sentence. * 1 : Sentence after leading punctuation (if any), but including opening punctuation, marks, and ¡¿, before first punctuation (if any). * 2 : First one character following [1], always some form of punctuation. Won't be set if [3] is set. * 3 : Following [1] until last punctuation that isn't some sort of connecting punctuation that's leading a word-boundary. * 4 : First three words leading [3]. Connecting punctuations that splits words are included as non-countable. * 5 : All extraneous characters leading [3] and/or [4]. If this isn't set, forgo including 4--it won't be meaningful. * } */ preg_match( '/(?:\A[\p{P}\p{Z}]*?)?([\P{Po}\p{M}\xBF\xA1:\'\p{Z}]+[\p{Z}\w])(?:([^\P{Po}\p{M}\xBF\xA1:]\Z(*ACCEPT))|((?(?=.+(?:\w+[\p{Pc}\p{Pd}\p{Pf}\p{Z}]*){1,3}|[\p{Po}]\Z)(?:[^\p{Pe}\p{Pf}]*+.*[\p{Pe}\p{Pf}]+\Z(*ACCEPT)|.*[^\P{Po}\p{M}\xBF\xA1:][^\P{Nd}\p{Z}]*)|.*\Z(*ACCEPT)))(?>(.+?\p{Z}*(?:\w+[\p{Pc}\p{Pd}\p{Pf}\p{Z}]*){1,3})|[^\p{Pc}\p{Pd}\p{M}\xBF\xA1:])?)(.+)?/su', $sentence, $matches, ); // Unmatched isn't set. Since we count from last to first match, we don't need to test strlen(). if ( isset( $matches[5] ) ) { $sentence = "$matches[1]$matches[3]$matches[4]$matches[5]"; // Skip 4. It's useless content without 5. } elseif ( isset( $matches[3] ) ) { $sentence = "$matches[1]$matches[3]"; } elseif ( isset( $matches[2] ) ) { // TODO Can we skip the next cleanup if we reach this? $sentence = "$matches[1]$matches[2]"; } elseif ( isset( $matches[1] ) ) { $sentence = $matches[1]; } else { // The sentence consists of control characters -- ditch it. return ''; } if ( \strlen( $sentence ) < $min_char_length ) return ''; /** * @param array $matches: { * 1 : Full match until leading punctuation. * 2 : Spaces before (if any) and including closing leading punctuation (if any). * 3 : Non-closing leading punctuation and spaces (if any). * } */ preg_match( '/(.+[^\p{Pc}\p{Pd}\p{M}\xBF\xA1:;,\p{Z}\p{Po}])+?(\p{Z}*?[^\p{Pc}\p{Pd}\p{M}\xBF\xA1:;,\p{Z}]+)?([\p{Pc}\p{Pd}\p{M}\xBF\xA1:;,\p{Z}]+)?/su', $sentence, $matches, ); if ( isset( $matches[2] ) && \strlen( $matches[2] ) ) { $sentence = "$matches[1]$matches[2]"; } elseif ( isset( $matches[1] ) && \strlen( $matches[1] ) ) { // Ignore useless [3], there's no [2], [1] is open-ended; so, add hellip. $sentence = "$matches[1]..."; // This should be texturized later to …. } else { // If there's no matches[1], only some form of non-closing-leading punctuation was left in $sentence. Empty it. $sentence = ''; } if ( \strlen( $sentence ) < $min_char_length ) return ''; return trim( htmlentities( $sentence, \ENT_QUOTES, 'UTF-8' ) ); } }
Cokiee Shell Web 1.0, Coded By Razor
Neueste Kommentare