Module: DBX
- Defined in:
- lib/dbx.rb,
lib/dbx/differ.rb,
lib/dbx/model_base.rb
Overview
#rubocop:disable all
Defined Under Namespace
Modules: Differ Classes: ModelBase
Constant Summary collapse
- CONFIG_PATH =
'dbx.yml'
Class Method Summary collapse
-
.column_types(src, sample_rows: config_sample_rows, csv_options: {}) ⇒ Hash<String, Symbol>
Read source as CSV and detect types based on ‘sample_rows` Types returns match with ActiveRecord column types.
- .config ⇒ Object
- .config_auto_index_pattern ⇒ Object
- .config_db ⇒ Object
- .config_from_yaml ⇒ Object
- .config_sample_rows ⇒ Object
- .connection(db_url: config_db, &block) ⇒ Object
- .create_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {}) ⇒ Object
-
.detect_type(value) ⇒ Object
Detect the column type given a value.
-
.import_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {}) ⇒ Object
TODO parse CSV options into Postgres.
- .index_table(table_name, pattern: nil) ⇒ Object
- .info(msg) ⇒ Object
- .parse_table_name(src) ⇒ Object
-
.tty ⇒ Object
TODO what about windows?!.
Class Method Details
.column_types(src, sample_rows: config_sample_rows, csv_options: {}) ⇒ Hash<String, Symbol>
Read source as CSV and detect types based on ‘sample_rows` Types returns match with ActiveRecord column types. Types are memory cached by `src`.
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/dbx.rb', line 123 def column_types(src, sample_rows: config_sample_rows, csv_options: {}) headers = nil count = 0 [:headers] = false @types ||= {} types = @types[src] return types if types types = [] CSV.foreach(src, **) do |row| unless headers headers = row.map(&:downcase) next end headers.each_with_index do |header, i| next if types[i] == :string pattern_type = config['column_patterns'].detect{ |pat, _| header =~ /#{pat}/ } if pattern_type types[i] = pattern_type.last next end type = detect_type(row[i]) next if type.nil? if types[i] == :decimal && type == :integer # keep decimal elsif types[i] == :datetime && type == :date # keep datetime else # assign the new type types[i] = type end end # stop after max rows reached break if (count += 1) > sample_rows end # any remaining nil types are assigned as :string types.size.times{|i| types[i] ||= :string } @types[src] = Hash[headers.zip(types)] end |
.config ⇒ Object
21 22 23 |
# File 'lib/dbx.rb', line 21 def config @config ||= config_from_yaml end |
.config_auto_index_pattern ⇒ Object
37 38 39 |
# File 'lib/dbx.rb', line 37 def config_auto_index_pattern config['auto_index_pattern'] end |
.config_db ⇒ Object
41 42 43 |
# File 'lib/dbx.rb', line 41 def config_db ENV['DATABASE_URL'] || config['db'] || raise('`db` not set as command line option or `dbx.yml`') end |
.config_from_yaml ⇒ Object
25 26 27 28 29 30 31 |
# File 'lib/dbx.rb', line 25 def config_from_yaml if File.file?(CONFIG_PATH) YAML.load(IO.read(CONFIG_PATH)) else {} end end |
.config_sample_rows ⇒ Object
33 34 35 |
# File 'lib/dbx.rb', line 33 def config_sample_rows config['sample_rows'] || 100 end |
.connection(db_url: config_db, &block) ⇒ Object
54 55 56 57 58 59 60 61 62 |
# File 'lib/dbx.rb', line 54 def connection(db_url: config_db, &block) # ENV['DATABASE_URL'] = db_url # @pool ||= ModelBase.establish_connection(db_url) @pool ||= begin ModelBase.establish_connection(db_url) ModelBase.logger = Logger.new(tty) end ModelBase.connection_pool.with_connection(&block) end |
.create_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {}) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/dbx.rb', line 68 def create_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {}) name ||= parse_table_name(src) types = column_types(src, sample_rows: sample_rows, csv_options: ) connection do |conn| conn.create_table name, force: force, id: false do |t| types.each do |column, type| t.send(type, column, nulls: true) end end end end |
.detect_type(value) ⇒ Object
Detect the column type given a value. May return nil if the value is blank.
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/dbx.rb', line 169 def detect_type(value) if value.blank? nil elsif value =~ /^\d+\.\d+$/ :decimal elsif value =~ /^\d{1,10}$/ :integer elsif value =~ /^\h{8}-\h{4}-\h{4}-\h{4}-\h{12}$/ :uuid elsif value =~ /^\d{4}(\D)\d{2}\1\d{2}$/ && (Date.parse(value) rescue false) :date elsif (Time.parse(value) rescue false) :datetime else :string end end |
.import_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {}) ⇒ Object
TODO parse CSV options into Postgres
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/dbx.rb', line 81 def import_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {}) name ||= parse_table_name(src) connection do |conn| create_table(src, name: name, force: force, sample_rows: sample_rows, csv_options: ) # TODO only postgres is support at the moment pg = conn.instance_variable_get(:@connection) types = column_types(src).keys.map{|m| %("#{m}")} pg_stmt = %{COPY "#{name}"(#{types.join(',')}) FROM STDIN CSV} conn.logger.debug(pg_stmt) pg.copy_data(pg_stmt) do first = true IO.foreach(src) do |line| if first first = false next end pg.put_copy_data(line) end end unless config_auto_index_pattern.blank? index_table(name, pattern: /#{config_auto_index_pattern}/) end end name end |
.index_table(table_name, pattern: nil) ⇒ Object
109 110 111 112 113 114 115 116 |
# File 'lib/dbx.rb', line 109 def index_table(table_name, pattern: nil) connection do |conn| conn.columns(table_name).each_with_index do |column, i| next unless column.name =~ pattern conn.add_index(table_name, [column.name], name: "idx_#{table_name}_#{i.to_s.rjust(2,'0')}") end end end |
.info(msg) ⇒ Object
50 51 52 |
# File 'lib/dbx.rb', line 50 def info(msg) tty.puts("\e[33m#{msg}\e[0m") end |
.parse_table_name(src) ⇒ Object
64 65 66 |
# File 'lib/dbx.rb', line 64 def parse_table_name(src) File.basename(src).sub(File.extname(src), '').downcase end |
.tty ⇒ Object
TODO what about windows?!
46 47 48 |
# File 'lib/dbx.rb', line 46 def tty @tty ||= File.open('/dev/tty', 'a') end |