Class: RDF::Tabular::Dialect
- Defined in:
- lib/rdf/tabular/metadata.rb
Constant Summary collapse
- DEFAULTS =
Defaults for dialects
{ commentPrefix: false, delimiter: ",".freeze, doubleQuote: true, encoding: "utf-8".freeze, header: true, headerRowCount: 1, lineTerminators: :auto, quoteChar: '"'.freeze, skipBlankRows: false, skipColumns: 0, skipInitialSpace: false, skipRows: 0, trim: true }.freeze
- PROPERTIES =
{ :@id => :link, :@type => :atomic, commentPrefix: :atomic, delimiter: :atomic, doubleQuote: :atomic, encoding: :atomic, header: :atomic, headerRowCount: :atomic, lineTerminators: :atomic, quoteChar: :atomic, skipBlankRows: :atomic, skipColumns: :atomic, skipInitialSpace: :atomic, skipRows: :atomic, trim: :atomic, }.freeze
- REQUIRED =
[].freeze
Constants inherited from Metadata
Metadata::DATATYPES, Metadata::INHERITED_DEFAULTS, Metadata::INHERITED_PROPERTIES, Metadata::LOCAL_CONTEXT, Metadata::NAME_SYNTAX
Instance Attribute Summary
Attributes inherited from Metadata
#filenames, #id, #object, #parent, #url
Instance Method Summary collapse
-
#embedded_metadata(input, metadata, **options) ⇒ Metadata
Extract a new Metadata document from the file or data provided.
-
#escape_character ⇒ String
escape character.
-
#headerRowCount ⇒ Integer
default for headerRowCount is zero if header is false.
-
#trim ⇒ Boolean, String
default for trim comes from skipInitialSpace.
Methods inherited from Metadata
#==, #[], #[]=, #base, #common_properties, #context, #datatype=, #describes_file?, #dialect, #dialect=, #each, #each_row, for_input, #has_annotations?, #initialize, #inspect, new, #normalize!, #normalize_jsonld, open, site_wide_config, #tableSchema=, #tables=, #to_json, #transformations=, #type, #valid?, #valid_natural_language_property?, #validate, #validate!, #verify_compatible!
Constructor Details
This class inherits a constructor from RDF::Tabular::Metadata
Instance Method Details
#embedded_metadata(input, metadata, **options) ⇒ Metadata
Extract a new Metadata document from the file or data provided
1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 |
# File 'lib/rdf/tabular/metadata.rb', line 1792 def (input, , **) = .dup .delete(:context) # Don't accidentally use a passed context # Normalize input to an IO object if input.is_a?(String) return ::RDF::Util::File.open_file(input) {|f| (f, , **.merge(base: input.to_s))} end table = { "@context" => "http://www.w3.org/ns/csvw", "url" => (.fetch(:base, "")), "@type" => "Table", "tableSchema" => { "@type" => "Schema", "columns" => [] } } ||= table # In case the embedded metadata becomes the final metadata lang = ["lang"] = [:lang] if [:lang] lang ||= 'und' # Set encoding on input path = input.base_uri.path rescue "" if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html' # Input is HTML; use fragment identfier to find table. fragment = RDF::URI(table["url"]).fragment rescue nil tab = begin # Extract with nokogiri require 'nokogiri' unless defined?(:Nokogiri) doc = Nokogiri::HTML.parse(input) doc.search("##{fragment}").first if fragment rescue LoadError # Extract with REXML # FIXME end raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab # Use rows with <th> to create column titles tab.xpath('.//tr').each do |row| row.xpath('th').map(&:content).each_with_index do |value, index| # Skip columns skipCols = skipColumns.to_i next if index < skipCols || value.to_s.empty? # Trim value value.lstrip! if %w(true start).include?(trim.to_s) value.rstrip! if %w(true end).include?(trim.to_s) # Initialize titles columns = table["tableSchema"]["columns"] ||= [] column = columns[index - skipCols] ||= { "titles" => {lang => []}, } column["titles"][lang] << value if value end end else csv = ::CSV.new(input, **) (1..skipRows.to_i).each do value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations # Trim value value.lstrip! if %w(true start).include?(trim.to_s) value.rstrip! if %w(true end).include?(trim.to_s) value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix) (["rdfs:comment"] ||= []) << value unless value.empty? end log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"} (1..headerRowCount).each do row_data = Array(csv.shift) Array(row_data).each_with_index do |value, index| # Skip columns skipCols = skipColumns.to_i next if index < skipCols || value.to_s.empty? # Trim value value.lstrip! if %w(true start).include?(trim.to_s) value.rstrip! if %w(true end).include?(trim.to_s) # Initialize titles columns = table["tableSchema"]["columns"] ||= [] column = columns[index - skipCols] ||= { "titles" => {lang => []}, } column["titles"][lang] << value end end end log_debug("embedded_metadata") {"table: #{table.inspect}"} input.rewind if input.respond_to?(:rewind) Table.new(table, **.merge(reason: "load embedded metadata: #{table['@id']}")) end |
#escape_character ⇒ String
escape character
1766 1767 1768 |
# File 'lib/rdf/tabular/metadata.rb', line 1766 def escape_character self.doubleQuote ? '"' : '\\' end |
#headerRowCount ⇒ Integer
default for headerRowCount is zero if header is false
1772 1773 1774 |
# File 'lib/rdf/tabular/metadata.rb', line 1772 def headerRowCount object.fetch(:headerRowCount, self.header ? 1 : 0) end |
#trim ⇒ Boolean, String
default for trim comes from skipInitialSpace
1778 1779 1780 |
# File 'lib/rdf/tabular/metadata.rb', line 1778 def trim object.fetch(:trim, self.skipInitialSpace ? 'start' : true) end |