Class: RobotsTxtController

Inherits:
ApplicationController show all
Defined in:
app/controllers/robots_txt_controller.rb

Constant Summary collapse

OVERRIDDEN_HEADER =
"# This robots.txt file has been customized at /admin/customize/robots\n"

Constants inherited from ApplicationController

ApplicationController::LEGACY_NO_THEMES, ApplicationController::LEGACY_NO_UNOFFICIAL_PLUGINS, ApplicationController::NO_PLUGINS, ApplicationController::NO_THEMES, ApplicationController::NO_UNOFFICIAL_PLUGINS, ApplicationController::SAFE_MODE

Constants included from CanonicalURL::ControllerExtensions

CanonicalURL::ControllerExtensions::ALLOWED_CANONICAL_PARAMS

Instance Attribute Summary

Attributes inherited from ApplicationController

#theme_id

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from ApplicationController

#application_layout, #can_cache_content?, #clear_notifications, #conditionally_allow_site_embedding, #current_homepage, #discourse_expires_in, #dont_cache_page, #ember_cli_required?, #fetch_user_from_params, #guardian, #handle_permalink, #handle_theme, #handle_unverified_request, #has_escaped_fragment?, #immutable_for, #no_cookies, #perform_refresh_session, #post_ids_including_replies, #preload_json, #rate_limit_second_factor!, #redirect_with_client_support, #render_json_dump, #render_serialized, requires_plugin, #rescue_discourse_actions, #resolve_safe_mode, #secure_session, #serialize_data, #set_current_user_for_logs, #set_layout, #set_mobile_view, #set_mp_snapshot_fields, #show_browser_update?, #store_preloaded, #use_crawler_layout?, #with_resolved_locale

Methods included from VaryHeader

#ensure_vary_header

Methods included from ReadOnlyMixin

#add_readonly_header, #allowed_in_staff_writes_only_mode?, #block_if_readonly_mode, #check_readonly_mode, included, #staff_writes_only_mode?

Methods included from Hijack

#hijack

Methods included from GlobalPath

#cdn_path, #cdn_relative_path, #full_cdn_url, #path, #upload_cdn_path

Methods included from JsonError

#create_errors_json

Methods included from CanonicalURL::ControllerExtensions

#canonical_url, #default_canonical, included

Methods included from CurrentUser

#clear_current_user, #current_user, has_auth_cookie?, #is_api?, #is_user_api?, #log_off_user, #log_on_user, lookup_from_env, #refresh_session

Class Method Details

.fetch_default_robots_infoObject



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'app/controllers/robots_txt_controller.rb', line 48

def self.fetch_default_robots_info
  deny_paths_googlebot = DISALLOWED_PATHS.map { |p| Discourse.base_path + p }
  deny_paths =
    deny_paths_googlebot + DISALLOWED_WITH_HEADER_PATHS.map { |p| Discourse.base_path + p }
  deny_all = ["#{Discourse.base_path}/"]

  result = {
    header:
      "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
    agents: [],
  }

  if SiteSetting.allowed_crawler_user_agents.present?
    SiteSetting
      .allowed_crawler_user_agents
      .split("|")
      .each do |agent|
        paths = agent == "Googlebot" ? deny_paths_googlebot : deny_paths
        result[:agents] << { name: agent, disallow: paths }
      end

    result[:agents] << { name: "*", disallow: deny_all }
  else
    if SiteSetting.blocked_crawler_user_agents.present?
      SiteSetting
        .blocked_crawler_user_agents
        .split("|")
        .each { |agent| result[:agents] << { name: agent, disallow: deny_all } }
    end

    result[:agents] << { name: "*", disallow: deny_paths }

    result[:agents] << { name: "Googlebot", disallow: deny_paths_googlebot }
  end

  DiscourseEvent.trigger(:robots_info, result)

  result
end

Instance Method Details

#builderObject

If you are hosting Discourse in a subfolder, you will need to create your robots.txt in the root of your web server with the appropriate paths. This method will return JSON that can be used by a script to create a robots.txt that works well with your existing site.



41
42
43
44
45
46
# File 'app/controllers/robots_txt_controller.rb', line 41

def builder
  result = self.class.fetch_default_robots_info
  overridden = SiteSetting.overridden_robots_txt
  result[:overridden] = overridden if overridden.present?
  render json: result
end

#indexObject



23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'app/controllers/robots_txt_controller.rb', line 23

def index
  if (overridden = SiteSetting.overridden_robots_txt.dup).present?
    overridden.prepend(OVERRIDDEN_HEADER) if guardian.is_admin? && !is_api?
    render plain: overridden
    return
  end
  if SiteSetting.allow_index_in_robots_txt?
    @robots_info = self.class.fetch_default_robots_info
    render :index, content_type: "text/plain"
  else
    render :no_index, content_type: "text/plain"
  end
end