Module: Twitter::Validation
Constant Summary collapse
- MAX_LENGTH =
140
- DEFAULT_TCO_URL_LENGTHS =
{ :short_url_length => 23, :short_url_length_https => 23, :characters_reserved_per_media => 23 }.freeze
- VALID_LIST_RE =
/\A#{Twitter::Regex[:valid_mention_or_list]}\z/o
Instance Method Summary collapse
-
#tweet_invalid?(text) ⇒ Boolean
Check the
text
for any reason that it may not be valid as a Tweet. -
#tweet_length(text, options = {}) ⇒ Object
Returns the length of the string as it would be displayed.
- #valid_hashtag?(hashtag) ⇒ Boolean
- #valid_list?(username_list) ⇒ Boolean
- #valid_tweet_text?(text) ⇒ Boolean
- #valid_url?(url, unicode_domains = true, require_protocol = true) ⇒ Boolean
- #valid_username?(username) ⇒ Boolean
Instance Method Details
#tweet_invalid?(text) ⇒ Boolean
Check the text
for any reason that it may not be valid as a Tweet. This is meant as a pre-validation before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation will allow quicker feedback.
Returns false
if this text
is valid. Otherwise one of the following Symbols will be returned:
<tt>:too_long</tt>:: if the <tt>text</tt> is too long
<tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
<tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/twitter-text/validation.rb', line 47 def tweet_invalid?(text) return :empty if !text || text.empty? begin return :too_long if tweet_length(text) > MAX_LENGTH return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) } rescue ArgumentError # non-Unicode value. return :invalid_characters end return false end |
#tweet_length(text, options = {}) ⇒ Object
Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC (See: www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a string no matter which actual form was transmitted. For example:
U+0065 Latin Small Letter E
+ U+0301 Combining Acute Accent
2 bytes, 2 characters, displayed as é (1 visual glyph)
… The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/twitter-text/validation.rb', line 25 def tweet_length(text, = {}) = DEFAULT_TCO_URL_LENGTHS.merge() length = text.to_nfc.unpack("U*").length Twitter::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position| length += start_position - end_position length += url.downcase =~ /^https:\/\// ? [:short_url_length_https] : [:short_url_length] end length end |
#valid_hashtag?(hashtag) ⇒ Boolean
79 80 81 82 83 84 85 |
# File 'lib/twitter-text/validation.rb', line 79 def valid_hashtag?(hashtag) return false if !hashtag || hashtag.empty? extracted = Twitter::Extractor.(hashtag) # Should extract the hashtag minus the # sign, hence the [1..-1] extracted.size == 1 && extracted.first == hashtag[1..-1] end |
#valid_list?(username_list) ⇒ Boolean
73 74 75 76 77 |
# File 'lib/twitter-text/validation.rb', line 73 def valid_list?(username_list) match = username_list.match(VALID_LIST_RE) # Must have matched and had nothing before or after !!(match && match[1] == "" && match[4] && !match[4].empty?) end |
#valid_tweet_text?(text) ⇒ Boolean
60 61 62 |
# File 'lib/twitter-text/validation.rb', line 60 def valid_tweet_text?(text) !tweet_invalid?(text) end |
#valid_url?(url, unicode_domains = true, require_protocol = true) ⇒ Boolean
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/twitter-text/validation.rb', line 87 def valid_url?(url, unicode_domains=true, require_protocol=true) return false if !url || url.empty? url_parts = url.match(Twitter::Regex[:validate_url_unencoded]) return false unless (url_parts && url_parts.to_s == url) scheme, , path, query, fragment = url_parts.captures return false unless ((!require_protocol || (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) && valid_match?(path, Twitter::Regex[:validate_url_path]) && valid_match?(query, Twitter::Regex[:validate_url_query], true) && valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true)) return (unicode_domains && valid_match?(, Twitter::Regex[:validate_url_unicode_authority])) || (!unicode_domains && valid_match?(, Twitter::Regex[:validate_url_authority])) end |
#valid_username?(username) ⇒ Boolean
64 65 66 67 68 69 70 |
# File 'lib/twitter-text/validation.rb', line 64 def valid_username?(username) return false if !username || username.empty? extracted = Twitter::Extractor.extract_mentioned_screen_names(username) # Should extract the username minus the @ sign, hence the [1..-1] extracted.size == 1 && extracted.first == username[1..-1] end |