File "RobotsTxt.php"
Full Path: /home/flipjqml/onlinebetsolution.com/wp-content/plugins/all-in-one-seo-pack/app/Common/Tools/RobotsTxt.php
File size: 17.44 KB
MIME-type: text/x-php
Charset: utf-8
<?php
namespace AIOSEO\Plugin\Common\Tools;
// Exit if accessed directly.
if ( ! defined( 'ABSPATH' ) ) {
exit;
}
use AIOSEO\Plugin\Common\Models;
class RobotsTxt {
/**
* Which directives are allowed to be extracted.
*
* @since 4.4.2
*
* @var array
*/
private $allowedDirectives = [ 'user-agent', 'allow', 'disallow', 'clean-param', 'crawl-delay' ];
/**
* Class constructor.
*
* @since 4.0.0
*/
public function __construct() {
add_filter( 'robots_txt', [ $this, 'buildRules' ], 10000 );
if ( ! is_admin() || wp_doing_ajax() || wp_doing_cron() ) {
return;
}
add_action( 'init', [ $this, 'checkForPhysicalFiles' ] );
}
/**
* Build out the robots.txt rules.
*
* @since 4.0.0
*
* @param string $original The original rules to parse.
* @return string The parsed/appended/modified rules.
*/
public function buildRules( $original ) {
// Other plugins might call this too early.
if ( ! property_exists( aioseo(), 'sitemap' ) ) {
return $original;
}
$originalRules = $this->extractRules( $original );
$networkRules = [];
if ( is_multisite() ) {
$networkRules = aioseo()->networkOptions->tools->robots->enable ? aioseo()->networkOptions->tools->robots->rules : [];
}
if ( ! aioseo()->options->tools->robots->enable ) {
$ruleset = $this->mergeRules( $originalRules, $this->groupRulesByUserAgent( $networkRules ) );
} else {
$ruleset = $this->mergeRules(
$originalRules,
$this->mergeRules( $this->groupRulesByUserAgent( $networkRules ), $this->groupRulesByUserAgent( aioseo()->options->tools->robots->rules ) ),
true
);
}
/**
* Any plugin can wrongly modify the robots.txt output by hoking into the `do_robots` action hook,
* instead of hooking into the `robots_txt` filter hook.
* For the first scenario, to make sure our output doesn't conflict with theirs, a new line is necessary.
*/
return $this->stringifyRuleset( $ruleset ) . "\n";
}
/**
* Merges two rulesets.
*
* @since 4.0.0
* @version 4.4.2
*
* @param array $rules1 An array of rules to merge with.
* @param array $rules2 An array of rules to merge.
* @param boolean $allowOverride Whether to allow overriding.
* @param boolean $allowDuplicates Whether to allow duplicates.
* @return array The validated rules.
*/
private function mergeRules( $rules1, $rules2, $allowOverride = false, $allowDuplicates = false ) {
foreach ( $rules2 as $userAgent => $rules ) {
if ( empty( $userAgent ) ) {
continue;
}
if ( empty( $rules1[ $userAgent ] ) ) {
$rules1[ $userAgent ] = array_unique( $rules2[ $userAgent ] );
continue;
}
list( $rules1, $rules2 ) = $this->mergeRulesHelper( 'allow', $userAgent, $rules, $rules1, $rules2, $allowDuplicates, $allowOverride );
list( $rules1, $rules2 ) = $this->mergeRulesHelper( 'disallow', $userAgent, $rules, $rules1, $rules2, $allowDuplicates, $allowOverride );
$rules1[ $userAgent ] = array_unique( array_merge(
$rules1[ $userAgent ],
$rules2[ $userAgent ]
) );
}
return $rules1;
}
/**
* Helper function for {@see mergeRules()}.
*
* @since 4.1.2
* @version 4.4.2
*
* @param string $directive The directive (allow/disallow).
* @param string $userAgent The user agent.
* @param array $rules The rules.
* @param array $rules1 The original rules.
* @param array $rules2 The extra rules.
* @param bool $allowDuplicates Whether duplicates should be allowed
* @param bool $allowOverride Whether the extra rules can override the original ones.
* @return array The original and extra rules.
*/
private function mergeRulesHelper( $directive, $userAgent, $rules, $rules1, $rules2, $allowDuplicates, $allowOverride ) {
$otherDirective = ( 'allow' === $directive ) ? 'disallow' : 'allow';
foreach ( $rules as $index1 => $rule ) {
list( , $ruleValue ) = $this->parseRule( $rule );
$index2 = array_search( "$otherDirective: $ruleValue", $rules1[ $userAgent ], true );
if ( false !== $index2 && ! $allowDuplicates ) {
if ( $allowOverride ) {
unset( $rules1[ $userAgent ][ $index2 ] );
} else {
unset( $rules2[ $userAgent ][ $index1 ] );
}
}
$pattern = str_replace( [ '.', '*', '?', '$' ], [ '\.', '(.*)', '\?', '\$' ], $ruleValue );
foreach ( $rules1[ $userAgent ] as $rule1 ) {
$matches = [];
preg_match( "#^$otherDirective: $pattern$#", $rule1, $matches );
}
if ( ! empty( $matches ) && ! $allowDuplicates ) {
unset( $rules2[ $userAgent ][ $index1 ] );
}
}
return [ $rules1, $rules2 ];
}
/**
* Parses a rule and extracts the directive and value.
*
* @since 4.4.2
*
* @param string $rule The rule to parse.
* @return array An array containing the parsed directive and value.
*/
private function parseRule( $rule ) {
list( $directive, $value ) = array_map( 'trim', array_pad( explode( ':', $rule, 2 ), 2, '' ) );
return [ $directive, $value ];
}
/**
* Stringifies the parsed rules.
*
* @since 4.0.0
* @version 4.4.2
*
* @param array $allRules The rules array.
* @return string The stringified rules.
*/
private function stringifyRuleset( $allRules ) {
$robots = [];
foreach ( $allRules as $userAgent => $rules ) {
if ( empty( $userAgent ) ) {
continue;
}
$robots[] = "\r\nUser-agent: $userAgent";
foreach ( $rules as $rule ) {
list( $directive, $value ) = $this->parseRule( $rule );
if ( empty( $directive ) || empty( $value ) ) {
continue;
}
$robots[] = sprintf( '%s: %s', ucfirst( $directive ), $value );
}
}
$robots = implode( "\r\n", $robots );
$sitemapUrls = $this->getSitemapRules();
if ( ! empty( $sitemapUrls ) ) {
$sitemapUrls = implode( "\r\n", $sitemapUrls );
$robots .= "\r\n\r\n$sitemapUrls";
}
return trim( $robots );
}
/**
* Get Sitemap URLs excluding the default ones.
*
* @since 4.1.7
*
* @return array An array of the Sitemap URLs.
*/
private function getSitemapRules() {
$defaultSitemaps = $this->extractSitemapUrls( aioseo()->robotsTxt->getDefaultRobotsTxtContent() );
$sitemapRules = aioseo()->sitemap->helpers->getSitemapUrlsPrefixed();
return array_diff( $sitemapRules, $defaultSitemaps );
}
/**
* Parses the rules.
*
* @since 4.0.0
* @version 4.4.2
*
* @param array $rules An array of rules.
* @return array The rules grouped by user agent.
*/
private function groupRulesByUserAgent( $rules ) {
$groups = [];
foreach ( $rules as $rule ) {
$r = json_decode( $rule, true );
if ( empty( $r['userAgent'] ) || empty( $r['fieldValue'] ) ) {
continue;
}
if ( empty( $groups[ $r['userAgent'] ] ) ) {
$groups[ $r['userAgent'] ] = [];
}
$groups[ $r['userAgent'] ][] = "{$r['directive']}: {$r['fieldValue']}";
}
return $groups;
}
/**
* Extract rules from a string.
*
* @since 4.0.0
* @version 4.4.2
*
* @param string $lines The lines to extract from.
* @return array An array of extracted rules.
*/
public function extractRules( $lines ) {
$lines = array_filter( array_map( 'trim', explode( "\n", (string) $lines ) ) );
$rules = [];
$userAgent = null;
$prevDirective = null;
$prevValue = null;
$siblingsUserAgents = [];
foreach ( $lines as $line ) {
list( $directive, $value ) = $this->parseRule( $line );
if ( empty( $directive ) || empty( $value ) ) {
continue;
}
$directive = strtolower( $directive );
if ( ! in_array( $directive, $this->allowedDirectives, true ) ) {
continue;
}
$value = $this->sanitizeDirectiveValue( $directive, $value );
if ( ! $value ) {
continue;
}
if ( 'user-agent' === $directive ) {
if (
! empty( $prevDirective ) &&
! empty( $prevValue ) &&
'user-agent' === $prevDirective
) {
$siblingsUserAgents[] = $prevValue;
}
$userAgent = $value;
$rules[ $userAgent ] = [];
} else {
$rules[ $userAgent ][] = "$directive: $value";
if ( $siblingsUserAgents ) {
foreach ( $siblingsUserAgents as $siblingUserAgent ) {
$rules[ $siblingUserAgent ] = $rules[ $userAgent ];
}
$siblingsUserAgents = [];
}
}
$prevDirective = $directive;
$prevValue = $value;
}
return $rules;
}
/**
* Extract sitemap URLs from a string.
*
* @since 4.0.10
*
* @param string $lines The lines to extract from.
* @return array An array of sitemap URLs.
*/
public function extractSitemapUrls( $lines ) {
$lines = array_filter( array_map( 'trim', explode( "\n", (string) $lines ) ) );
$sitemapUrls = [];
foreach ( $lines as $line ) {
$array = array_map( 'trim', explode( 'sitemap:', strtolower( $line ) ) );
if ( ! empty( $array[1] ) ) {
$sitemapUrls[] = trim( $line );
}
}
return $sitemapUrls;
}
/**
* Sanitize the robots.txt rule directive value.
*
* @since 4.0.0
* @version 4.4.2
*
* @param string $directive The directive.
* @param string $value The value.
* @return string The directive value.
*/
private function sanitizeDirectiveValue( $directive, $value ) {
// Percent-encoded characters are stripped from our option values, so we decode.
$value = rawurldecode( trim( $value ) );
if ( ! $value ) {
return $value;
}
$value = preg_replace( '/[><]/', '', $value );
if ( 'user-agent' === $directive ) {
$value = preg_replace( '/[^a-z0-9\-_*,.\s]/i', '', $value );
}
if ( 'allow' === $directive || 'disallow' === $directive ) {
$value = preg_replace( '/^\/+/', '/', $value );
}
return $value;
}
/**
* Check if a physical robots.txt file exists, and if it does add a notice.
*
* @since 4.0.0
*
* @return void
*/
public function checkForPhysicalFiles() {
if ( ! $this->hasPhysicalRobotsTxt() ) {
return;
}
$notification = Models\Notification::getNotificationByName( 'robots-physical-file' );
if ( $notification->exists() ) {
return;
}
Models\Notification::addNotification( [
'slug' => uniqid(),
'notification_name' => 'robots-physical-file',
'title' => __( 'Physical Robots.txt File Detected', 'all-in-one-seo-pack' ),
'content' => sprintf(
// Translators: 1 - The plugin short name ("AIOSEO"), 2 - The plugin short name ("AIOSEO").
__( '%1$s has detected a physical robots.txt file in the root folder of your WordPress installation. We recommend removing this file as it could cause conflicts with WordPress\' dynamically generated one. %2$s can import this file and delete it, or you can simply delete it.', 'all-in-one-seo-pack' ), // phpcs:ignore Generic.Files.LineLength.MaxExceeded
AIOSEO_PLUGIN_SHORT_NAME,
AIOSEO_PLUGIN_SHORT_NAME
),
'type' => 'error',
'level' => [ 'all' ],
'button1_label' => __( 'Import and Delete', 'all-in-one-seo-pack' ),
'button1_action' => 'http://action#tools/import-robots-txt?redirect=aioseo-tools:robots-editor',
'button2_label' => __( 'Delete', 'all-in-one-seo-pack' ),
'button2_action' => 'http://action#tools/delete-robots-txt?redirect=aioseo-tools:robots-editor',
'start' => gmdate( 'Y-m-d H:i:s' )
] );
}
/**
* Import physical robots.txt file.
*
* @since 4.0.0
* @version 4.4.2
*
* @param bool $network True if inside WordPress network administration pages.
* @throws \Exception If request fails or file is not readable.
* @return boolean Whether or not the file imported correctly.
*/
public function importPhysicalRobotsTxt( $network = false ) {
try {
$fs = aioseo()->core->fs;
if ( ! $fs->isWpfsValid() ) {
$invalid = true;
}
$file = trailingslashit( $fs->fs->abspath() ) . 'robots.txt';
if (
isset( $invalid ) ||
! $fs->isReadable( $file )
) {
throw new \Exception( esc_html__( 'There was an error importing the static robots.txt file.', 'all-in-one-seo-pack' ) );
}
$lines = trim( (string) $fs->getContents( $file ) );
if ( $lines ) {
$this->importRobotsTxtFromText( $lines, $network );
}
return true;
} catch ( \Exception $e ) {
throw new \Exception( esc_html( $e->getMessage() ) );
}
}
/**
* Import robots.txt from a URL.
*
* @since 4.4.2
*
* @param string $text The text to import from.
* @param bool $network True if inside WordPress network administration pages.
* @throws \Exception If no User-agent is found.
* @return boolean Whether the file imported correctly or not.
*/
public function importRobotsTxtFromText( $text, $network ) {
$ruleset = $this->extractRules( $text );
if ( ! key( $ruleset ) ) {
throw new \Exception( esc_html__( 'No User-agent found in the content beginning.', 'all-in-one-seo-pack' ) );
}
$options = aioseo()->options;
if ( $network ) {
$options = aioseo()->networkOptions;
}
$currentRules = $this->groupRulesByUserAgent( $options->tools->robots->rules );
$ruleset = $this->mergeRules( $currentRules, $ruleset, false, true );
$options->tools->robots->rules = aioseo()->robotsTxt->prepareRobotsTxt( $ruleset );
return true;
}
/**
* Import robots.txt from a URL.
*
* @since 4.4.2
*
* @param string $url The URL to import from.
* @param bool $network True if inside WordPress network administration pages.
* @throws \Exception If request fails.
* @return bool Whether the import was successful or not.
*/
public function importRobotsTxtFromUrl( $url, $network ) {
$request = wp_remote_get( $url, [
'timeout' => 10,
'sslverify' => false
] );
$robotsTxtContent = wp_remote_retrieve_body( $request );
if ( ! $robotsTxtContent ) {
throw new \Exception( esc_html__( 'There was an error importing the robots.txt content from the URL.', 'all-in-one-seo-pack' ) );
}
$options = aioseo()->options;
if ( $network ) {
$options = aioseo()->networkOptions;
}
$newRules = $this->extractRules( $robotsTxtContent );
$currentRules = $this->groupRulesByUserAgent( $options->tools->robots->rules );
$newRules = $this->mergeRules( $currentRules, $newRules, false, true );
$options->tools->robots->rules = aioseo()->robotsTxt->prepareRobotsTxt( $newRules );
return true;
}
/**
* Deletes the physical robots.txt file.
*
* @since 4.4.5
*
* @throws \Exception If the file is not readable, or it can't be deleted.
* @return true True if the file was successfully deleted.
*/
public function deletePhysicalRobotsTxt() {
try {
$fs = aioseo()->core->fs;
if (
! $fs->isWpfsValid() ||
! $fs->fs->delete( trailingslashit( $fs->fs->abspath() ) . 'robots.txt' )
) {
throw new \Exception( __( 'There was an error deleting the physical robots.txt file.', 'all-in-one-seo-pack' ) );
}
Models\Notification::deleteNotificationByName( 'robots-physical-file' );
return true;
} catch ( \Exception $e ) {
throw new \Exception( esc_html( $e->getMessage() ) );
}
}
/**
* Prepare robots.txt rules to save.
*
* @since 4.1.4
*
* @param array $allRules Array with the rules.
* @return array The prepared rules array.
*/
public function prepareRobotsTxt( $allRules = [] ) {
$robots = [];
foreach ( $allRules as $userAgent => $rules ) {
if ( empty( $userAgent ) ) {
continue;
}
foreach ( $rules as $rule ) {
list( $directive, $value ) = $this->parseRule( $rule );
if ( empty( $directive ) || empty( $value ) ) {
continue;
}
if (
'*' === $userAgent &&
(
'allow' === $directive && '/wp-admin/admin-ajax.php' === $value ||
'disallow' === $directive && '/wp-admin/' === $value
)
) {
continue;
}
$robots[] = wp_json_encode( [
'userAgent' => $userAgent,
'directive' => $directive,
'fieldValue' => $value
] );
}
}
return $robots;
}
/**
* Checks if a physical robots.txt file exists.
*
* @since 4.0.0
*
* @return boolean True if it does, false if not.
*/
public function hasPhysicalRobotsTxt() {
$fs = aioseo()->core->fs;
if ( ! $fs->isWpfsValid() ) {
return false;
}
$accessType = get_filesystem_method();
if ( 'direct' === $accessType ) {
$file = trailingslashit( $fs->fs->abspath() ) . 'robots.txt';
return $fs->exists( $file );
}
return false;
}
/**
* Get the default Robots.txt lines (excluding our own).
*
* @since 4.1.7
* @version 4.4.2
*
* @return string The robots.txt content rules (excluding our own).
*/
public function getDefaultRobotsTxtContent() {
// First, we need to remove our filter, so that it doesn't run unintentionally.
remove_filter( 'robots_txt', [ $this, 'buildRules' ], 10000 );
ob_start();
do_robots();
if ( is_admin() ) {
header( 'Content-Type: text/html; charset=utf-8' );
}
$rules = strval( ob_get_clean() );
// Add the filter back.
add_filter( 'robots_txt', [ $this, 'buildRules' ], 10000 );
return $rules;
}
/**
* A check to see if the rewrite rules are set.
* This isn't perfect, but it will help us know in most cases.
*
* @since 4.0.0
*
* @return boolean Whether the rewrite rules are set or not.
*/
public function rewriteRulesExist() {
// If we have a physical file, it's almost impossible to tell if the rewrite rules are set.
// The only scenario is if we still get a 404.
$response = wp_remote_get( aioseo()->helpers->getSiteUrl() . '/robots.txt' );
if ( 299 < wp_remote_retrieve_response_code( $response ) ) {
return false;
}
return true;
}
}