Class: HtmlTokenizer::Parser
- Inherits:
-
Object
- Object
- HtmlTokenizer::Parser
- Defined in:
- ext/better_html_ext/parser.c
Instance Method Summary collapse
- #append_placeholder(source) ⇒ Object
- #attribute_name ⇒ Object
- #attribute_quoted? ⇒ Boolean
- #attribute_value ⇒ Object
- #cdata_text ⇒ Object
- #closing_tag? ⇒ Boolean
- #column_number ⇒ Object
- #comment_text ⇒ Object
- #context ⇒ Object
- #document ⇒ Object
- #document_length ⇒ Object
- #errors ⇒ Object
- #errors_count ⇒ Object
- #initialize ⇒ Object constructor
- #line_number ⇒ Object
- #parse(source) ⇒ Object
- #quote_character ⇒ Object
- #rawtext_text ⇒ Object
- #self_closing_tag? ⇒ Boolean
- #tag_name ⇒ Object
Constructor Details
#initialize ⇒ Object
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 |
# File 'ext/better_html_ext/parser.c', line 468
static VALUE parser_initialize_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
DBG_PRINT("parser=%p initialize", parser);
memset(parser, 0, sizeof(struct parser_t));
parser->context = PARSER_NONE;
tokenizer_init(&parser->tk);
parser->tk.callback_data = parser;
parser->tk.f_callback = parser_tokenize_callback;
parser->doc.length = 0;
parser->doc.data = NULL;
parser->doc.enc_index = 0;
parser->doc.mb_length = 0;
parser->doc.line_number = 1;
parser->doc.column_number = 0;
parser->errors_count = 0;
parser->errors = NULL;
return Qnil;
}
|
Instance Method Details
#append_placeholder(source) ⇒ Object
566 567 568 569 |
# File 'ext/better_html_ext/parser.c', line 566
static VALUE parser_append_placeholder_method(VALUE self, VALUE source)
{
return parser_append_data(self, source, 1);
}
|
#attribute_name ⇒ Object
667 668 669 670 671 672 |
# File 'ext/better_html_ext/parser.c', line 667
static VALUE parser_attribute_name_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->attribute.name);
}
|
#attribute_quoted? ⇒ Boolean
690 691 692 693 694 695 |
# File 'ext/better_html_ext/parser.c', line 690
static VALUE parser_attribute_is_quoted_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return parser->attribute.is_quoted ? Qtrue : Qfalse;
}
|
#attribute_value ⇒ Object
674 675 676 677 678 679 |
# File 'ext/better_html_ext/parser.c', line 674
static VALUE parser_attribute_value_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->attribute.value);
}
|
#cdata_text ⇒ Object
704 705 706 707 708 709 |
# File 'ext/better_html_ext/parser.c', line 704
static VALUE parser_cdata_text_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->cdata.text);
}
|
#closing_tag? ⇒ Boolean
653 654 655 656 657 658 |
# File 'ext/better_html_ext/parser.c', line 653
static VALUE parser_closing_tag_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return parser->tk.is_closing_tag ? Qtrue : Qfalse;
}
|
#column_number ⇒ Object
762 763 764 765 766 767 |
# File 'ext/better_html_ext/parser.c', line 762
static VALUE parser_column_number_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ULONG2NUM(parser->doc.column_number);
}
|
#comment_text ⇒ Object
697 698 699 700 701 702 |
# File 'ext/better_html_ext/parser.c', line 697
static VALUE parser_comment_text_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->comment.text);
}
|
#context ⇒ Object
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 |
# File 'ext/better_html_ext/parser.c', line 600
static VALUE parser_context_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
switch(parser->context) {
case PARSER_NONE:
return rawtext_context(parser) ? ID2SYM(rb_intern("rawtext")) : ID2SYM(rb_intern("none"));
case PARSER_SOLIDUS_OR_TAG_NAME:
return ID2SYM(rb_intern("solidus_or_tag_name"));
case PARSER_TAG_NAME:
return ID2SYM(rb_intern("tag_name"));
case PARSER_TAG:
return ID2SYM(rb_intern("tag"));
case PARSER_ATTRIBUTE_NAME:
return ID2SYM(rb_intern("attribute_name"));
case PARSER_ATTRIBUTE_WHITESPACE_OR_EQUAL:
return ID2SYM(rb_intern("after_attribute_name"));
case PARSER_ATTRIBUTE_WHITESPACE_OR_VALUE:
return ID2SYM(rb_intern("after_equal"));
case PARSER_ATTRIBUTE_QUOTED_VALUE:
return ID2SYM(rb_intern("quoted_value"));
case PARSER_SPACE_AFTER_ATTRIBUTE:
return ID2SYM(rb_intern("space_after_attribute"));
case PARSER_ATTRIBUTE_UNQUOTED_VALUE:
return ID2SYM(rb_intern("unquoted_value"));
case PARSER_TAG_END:
return ID2SYM(rb_intern("tag_end"));
case PARSER_COMMENT:
return ID2SYM(rb_intern("comment"));
case PARSER_CDATA:
return ID2SYM(rb_intern("cdata"));
}
return Qnil;
}
|
#document ⇒ Object
571 572 573 574 575 576 577 578 579 580 |
# File 'ext/better_html_ext/parser.c', line 571
static VALUE parser_document_method(VALUE self)
{
struct parser_t *parser = NULL;
rb_encoding *enc;
Parser_Get_Struct(self, parser);
if(!parser->doc.data)
return Qnil;
enc = rb_enc_from_index(parser->doc.enc_index);
return rb_enc_str_new(parser->doc.data, parser->doc.length, enc);
}
|
#document_length ⇒ Object
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 |
# File 'ext/better_html_ext/parser.c', line 582
static VALUE parser_document_length_method(VALUE self)
{
struct parser_t *parser = NULL;
rb_encoding *enc;
const char *buf;
Parser_Get_Struct(self, parser);
if(parser->doc.data == NULL) {
return ULONG2NUM(0);
}
else {
buf = parser->doc.data;
enc = rb_enc_from_index(parser->doc.enc_index);
return ULONG2NUM(rb_enc_strlen(buf, buf + parser->doc.length, enc));
}
}
|
#errors ⇒ Object
738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 |
# File 'ext/better_html_ext/parser.c', line 738
static VALUE parser_errors_method(VALUE self)
{
struct parser_t *parser = NULL;
VALUE list;
size_t i;
Parser_Get_Struct(self, parser);
list = rb_ary_new();
for(i=0; i<parser->errors_count; i++) {
if(parser->errors[i].message) {
rb_ary_push(list, create_parser_error(&parser->errors[i]));
}
}
return list;
}
|
#errors_count ⇒ Object
718 719 720 721 722 723 |
# File 'ext/better_html_ext/parser.c', line 718
static VALUE parser_errors_count_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return INT2NUM(parser->errors_count);
}
|
#line_number ⇒ Object
755 756 757 758 759 760 |
# File 'ext/better_html_ext/parser.c', line 755
static VALUE parser_line_number_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ULONG2NUM(parser->doc.line_number);
}
|
#parse(source) ⇒ Object
561 562 563 564 |
# File 'ext/better_html_ext/parser.c', line 561
static VALUE parser_parse_method(VALUE self, VALUE source)
{
return parser_append_data(self, source, 0);
}
|
#quote_character ⇒ Object
681 682 683 684 685 686 687 688 |
# File 'ext/better_html_ext/parser.c', line 681
static VALUE parser_quote_character_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return parser->attribute.is_quoted ?
rb_str_new(&parser->tk.attribute_value_start, 1) :
Qnil;
}
|
#rawtext_text ⇒ Object
711 712 713 714 715 716 |
# File 'ext/better_html_ext/parser.c', line 711
static VALUE parser_rawtext_text_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->rawtext.text);
}
|
#self_closing_tag? ⇒ Boolean
660 661 662 663 664 665 |
# File 'ext/better_html_ext/parser.c', line 660
static VALUE parser_self_closing_tag_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return parser->tag.self_closing ? Qtrue : Qfalse;
}
|
#tag_name ⇒ Object
646 647 648 649 650 651 |
# File 'ext/better_html_ext/parser.c', line 646
static VALUE parser_tag_name_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->tag.name);
}
|