Module: Iudex::HTML::Filters::FactoryHelper
- Includes:
- Core, Tree, Tree::Filters
- Defined in:
- lib/iudex-html/factory_helper.rb
Instance Method Summary collapse
-
#html_clean_filters(src_key, tree_key = nil) ⇒ Object
Create html parse and clean filters Expected usage: PAGE: html_clean_filters( :source ) FEED: html_clean_filters( :title ) FEED: html_clean_filters( :summary ) FEED: html_clean_filters( :content ).
- #html_parse_filter(src_key, tree_key = nil) ⇒ Object
- #html_tree_filters ⇒ Object
-
#html_write_filter(key1, key2 = nil) ⇒ Object
Expected usage: FEED: html_write_filter( :summary ).
Methods included from Tree
Instance Method Details
#html_clean_filters(src_key, tree_key = nil) ⇒ Object
Create html parse and clean filters Expected usage:
PAGE: html_clean_filters( :source )
FEED: html_clean_filters( :title )
FEED: html_clean_filters( :summary )
FEED: html_clean_filters( :content )
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/iudex-html/factory_helper.rb', line 35 def html_clean_filters( src_key, tree_key = nil ) tree_key = "#{src_key}_tree".to_sym unless tree_key src_key, tree_key = src_key.to_k, tree_key.to_k filters = [] filters << html_parse_filter( src_key, tree_key ) #FIXME: PAGE: filters << TitleExtractor.new, or after? # FIXME: if src is text, last filter # filters << TextCtrlWSFilter.new( ContentKeys::TITLE ) tfc = TreeFilterChain.new( html_tree_filters ) filters << HTMLTreeFilter.new( tree_key, tfc, HTMLTreeFilter::Order::DEPTH_FIRST ) #FIXME: First block extractor back to text key? filters end |
#html_parse_filter(src_key, tree_key = nil) ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/iudex-html/factory_helper.rb', line 69 def html_parse_filter( src_key, tree_key = nil ) tree_key = "#{src_key}_tree".to_sym unless tree_key src_key, tree_key = src_key.to_k, tree_key.to_k if( src_key.value_type == ContentSource.java_class ) HTMLParseFilter.new( src_key, nil, tree_key ) else HTMLParseFilter.new( src_key, tree_key ) end end |
#html_tree_filters ⇒ Object
58 59 60 61 62 63 64 65 66 67 |
# File 'lib/iudex-html/factory_helper.rb', line 58 def html_tree_filters [ XmpToPreConverter.new, # Before CharactersNormalizer CSSDisplayFilter.new, # Before AttributeCleaner AttributeCleaner.new, MojiBakeCleaner.new, CharactersNormalizer.new, EmptyInlineRemover.new, # Depth WordCounter.new, # Depth; only for count deps? WordyCounter.new ] # Depth; only with cleaners/simhash? end |
#html_write_filter(key1, key2 = nil) ⇒ Object
Expected usage:
FEED: html_write_filter( :summary )
83 84 85 86 87 88 89 90 91 92 |
# File 'lib/iudex-html/factory_helper.rb', line 83 def html_write_filter( key1, key2 = nil ) tree_key, out_key = if key2 [ key1, key2 ] else [ "#{key1}_tree".to_sym, key1 ] end HTMLWriteFilter.new( tree_key.to_k, out_key.to_k ) end |