Class: Nokolexbor::Document
- Defined in:
- lib/nokolexbor/document.rb,
ext/nokolexbor/nl_document.c
Constant Summary
Constants inherited from Node
Node::ATTRIBUTE_NODE, Node::CDATA_SECTION_NODE, Node::COMMENT_NODE, Node::DOCUMENT_FRAG_NODE, Node::DOCUMENT_NODE, Node::DOCUMENT_TYPE_NODE, Node::ELEMENT_NODE, Node::ENTITY_NODE, Node::ENTITY_REF_NODE, Node::LOOKS_LIKE_XPATH, Node::NOTATION_NODE, Node::PI_NODE, Node::TEXT_NODE
Class Method Summary collapse
-
.new ⇒ Document
Create a new document.
-
.parse(string_or_io) ⇒ Document
Parse HTML into a Document.
Instance Method Summary collapse
-
#create_cdata(string, &block) ⇒ CDATA
Create a CDATA containing
string
. -
#create_comment(string, &block) ⇒ Comment
Create a Comment containing
string
. -
#create_element(name, *contents_or_attrs, &block) ⇒ Element
Create an Element with
name
belonging to this document, optionally setting contents or attributes. -
#create_text_node(string, &block) ⇒ Text
Create a Text with
string
. -
#document ⇒ Document
A reference to
self
. -
#meta_encoding ⇒ String
Get the meta tag encoding for this document.
-
#meta_encoding=(encoding) ⇒ Object
Set the meta tag encoding for this document.
-
#root ⇒ Node
Get the root node for this document.
- #set_metadata_element(element) ⇒ Object
-
#title ⇒ String
Get the title of this document.
-
#title=(text) ⇒ String
Set the title of this document.
Methods inherited from Node
#<<, #==, #[], #[]=, #add_child, #add_class, #add_next_sibling, #add_previous_sibling, #add_sibling, #after, #ancestors, #append_class, #at, #at_css, #at_css_impl, #at_xpath, #attribute, #attribute_nodes, #attributes, #attrs, #before, #cdata?, #child, #children, #children=, #classes, #clone, #comment?, #content, #content=, #css, #css_impl, #css_path, #destroy, #document?, #each, #element?, #element_children, #first_element_child, #fragment, #fragment?, #inner_html, #inspect, #key?, #keys, #kwattr_add, #kwattr_append, #kwattr_remove, #kwattr_values, #last_element_child, #matches?, #name, #next, #next_element, #node_type, #nokogiri_at_css, #nokogiri_css, #outer_html, #parent, #parent=, #parse, #path, #prepend_child, #previous, #previous_element, #processing_instruction?, #remove, #remove_attr, #remove_class, #replace, #search, #source_location, #swap, #text?, #traverse, #value?, #values, #wrap, #write_to, #xpath
Class Method Details
.new ⇒ Document
Create a new document.
84 85 86 87 88 |
# File 'ext/nokolexbor/nl_document.c', line 84
static VALUE
nl_document_new(VALUE self)
{
return nl_document_parse(self, rb_str_new("", 0));
}
|
.parse(string_or_io) ⇒ Document
Parse HTML into a Nokolexbor::Document.
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'ext/nokolexbor/nl_document.c', line 40
static VALUE
nl_document_parse(VALUE self, VALUE rb_string_or_io)
{
VALUE id_read = rb_intern("read");
VALUE rb_html;
if (rb_respond_to(rb_string_or_io, id_read)) {
rb_html = rb_funcall(rb_string_or_io, id_read, 0);
} else {
rb_html = rb_string_or_io;
}
const char *html_c = StringValuePtr(rb_html);
size_t html_len = RSTRING_LEN(rb_html);
#ifdef HAVE_PTHREAD_H
lxb_html_parser_t *g_parser = (lxb_html_parser_t *)pthread_getspecific(p_key_parser);
#else
lxb_html_parser_t *g_parser = NULL;
#endif
if (g_parser == NULL) {
g_parser = lxb_html_parser_create();
lxb_status_t status = lxb_html_parser_init(g_parser);
if (status != LXB_STATUS_OK) {
nl_raise_lexbor_error(status);
}
g_parser->tree->scripting = true;
#ifdef HAVE_PTHREAD_H
pthread_setspecific(p_key_parser, g_parser);
#endif
}
lxb_html_document_t *document = lxb_html_parse(g_parser, (const lxb_char_t *)html_c, html_len);
if (document == NULL) {
rb_raise(rb_eRuntimeError, "Error parsing document");
}
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
}
|
Instance Method Details
#create_cdata(string, &block) ⇒ CDATA
Create a CDATA containing string
.
57 58 59 |
# File 'lib/nokolexbor/document.rb', line 57 def create_cdata(string, &block) Nokolexbor::CDATA.new(string.to_s, self, &block) end |
#create_comment(string, &block) ⇒ Comment
Create a Comment containing string
.
64 65 66 |
# File 'lib/nokolexbor/document.rb', line 64 def create_comment(string, &block) Nokolexbor::Comment.new(string.to_s, self, &block) end |
#create_element(name, *contents_or_attrs, &block) ⇒ Element
Create an Element with name
belonging to this document, optionally setting contents or attributes.
32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/nokolexbor/document.rb', line 32 def create_element(name, *contents_or_attrs, &block) elm = Nokolexbor::Element.new(name, self, &block) contents_or_attrs.each do |arg| case arg when Hash arg.each do |k, v| elm[k.to_s] = v.to_s end else elm.content = arg.to_s end end elm end |
#create_text_node(string, &block) ⇒ Text
Create a Text with string
.
50 51 52 |
# File 'lib/nokolexbor/document.rb', line 50 def create_text_node(string, &block) Nokolexbor::Text.new(string.to_s, self, &block) end |
#document ⇒ Document
A reference to self
.
71 72 73 |
# File 'lib/nokolexbor/document.rb', line 71 def document self end |
#meta_encoding ⇒ String
Get the meta tag encoding for this document. If there is no meta tag, nil is returned.
78 79 80 81 82 83 84 |
# File 'lib/nokolexbor/document.rb', line 78 def if ( = at_css("meta[charset]")) [:charset] elsif ( = ) ["content"][/charset\s*=\s*([\w-]+)/i, 1] end end |
#meta_encoding=(encoding) ⇒ Object
Set the meta tag encoding for this document.
If an meta encoding tag is already present, its content is replaced with the given text.
Otherwise, this method tries to create one at an appropriate place supplying head and/or html elements as necessary, which is inside a head element if any, and before any text node or content element (typically <body>) if any.
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/nokolexbor/document.rb', line 95 def (encoding) if ( = ) ["content"] = format("text/html; charset=%s", encoding) encoding elsif ( = at_css("meta[charset]")) ["charset"] = encoding else = Nokolexbor::Node.new("meta", self) ["charset"] = encoding if (head = at_css("head")) head.prepend_child() else () end encoding end end |
#root ⇒ Node
Get the root node for this document.
138 139 140 141 142 143 |
# File 'ext/nokolexbor/nl_document.c', line 138
static VALUE
nl_document_root(VALUE self)
{
lxb_dom_document_t *doc = nl_rb_document_unwrap(self);
return nl_rb_node_create(lxb_dom_document_root(doc), self);
}
|
#set_metadata_element(element) ⇒ Object
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/nokolexbor/document.rb', line 121 def (element) if (head = at_css("head")) head << element elsif (html = at_css("html")) head = html.prepend_child(Nokolexbor::Node.new("head", self)) head.prepend_child(element) elsif (first = children.find do |node| case node when Nokolexbor::Node true end end) # We reach here only if the underlying document model # allows <html>/<head> elements to be omitted and does not # automatically supply them. first.add_previous_sibling(element) else html = add_child(Nokolexbor::Node.new("html", self)) head = html.add_child(Nokolexbor::Node.new("head", self)) head.prepend_child(element) end end |
#title ⇒ String
Get the title of this document.
103 104 105 106 107 108 109 |
# File 'ext/nokolexbor/nl_document.c', line 103
static VALUE
nl_document_get_title(VALUE self)
{
size_t len;
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(self), &len);
return str == NULL ? rb_str_new("", 0) : rb_utf8_str_new(str, len);
}
|
#title=(text) ⇒ String
Set the title of this document.
If a title element is already present, its content is replaced with the given text.
Otherwise, this method tries to create one inside <head>.
124 125 126 127 128 129 130 131 |
# File 'ext/nokolexbor/nl_document.c', line 124
static VALUE
nl_document_set_title(VALUE self, VALUE rb_title)
{
const char *c_title = StringValuePtr(rb_title);
size_t len = RSTRING_LEN(rb_title);
lxb_html_document_title_set(nl_rb_document_unwrap(self), (const lxb_char_t *)c_title, len);
return rb_title;
}
|