Class: ScrapeCompanys

Inherits:
Object
  • Object
show all
Defined in:
lib/scrapper_bovespa/scrapper.rb

Instance Method Summary collapse

Constructor Details

#initializeScrapeCompanys

Returns a new instance of ScrapeCompanys.



45
46
47
48
# File 'lib/scrapper_bovespa/scrapper.rb', line 45

def initialize()
	@base_query_address = "http://bvmf.bmfbovespa.com.br/pt-br/mercados/acoes/empresas/ExecutaAcaoConsultaInfoEmp.asp?CodCVM="
	@code_source = "http://cvmweb.cvm.gov.br/SWB/Sistemas/SCW/CPublica/CiaAb/FormBuscaCiaAbOrdAlf.aspx?LetraInicial="
end

Instance Method Details

#extract_data_composicao_capital_social(source) ⇒ Object



68
69
70
71
# File 'lib/scrapper_bovespa/scrapper.rb', line 68

def extract_data_composicao_capital_social(source)
	simplified = source[0]
	simplified[3..simplified.size]
end

#extract_many_td(source) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/scrapper_bovespa/scrapper.rb', line 93

def extract_many_td(source)
	divided = source[0].split("\r\n")
	simplified = divided[1..divided.size]
	output = []

	simplified.each do |s|
		s.reverse!
		output << s[(s.index(">dt/<") + 4)..(s.index(">\"") - 1)].reverse
	end

	output	
end

#extract_many_th(source) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/scrapper_bovespa/scrapper.rb', line 73

def extract_many_th(source)
	divided = source[0].split("\r\n")
	simplified = divided[1..divided.size]
	output = []

	simplified.each do |s|
		s.reverse!

		if s.index(">ht/<") != nil then
			begin
				output << s[(s.index(">ht/<") + 4)..(s.index(">\"") - 1)].reverse
			rescue => e
				output << s[(s.index(">ht/<") + 4)..(s.index(">h") - 1)]
			end
		end
	end

	output	
end

#extract_simple_text(source) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
# File 'lib/scrapper_bovespa/scrapper.rb', line 50

def extract_simple_text(source)
	simplified = source[0].reverse

	begin
		simplified = simplified[(simplified.index(">dt/<") + 5)..(simplified.index(">dt<") - 1)]
	rescue => e
		simplified = simplified[(simplified.index(">dt/<") + 5)..(simplified.index("\>\""))]
	end

	simplified.reverse
end

#extract_site(source) ⇒ Object



62
63
64
65
66
# File 'lib/scrapper_bovespa/scrapper.rb', line 62

def extract_site(source)
	simplified = source[0].reverse
	simplified = simplified[(simplified.index(">a/<") + 4)..(simplified.index(">\"") - 1)]
	simplified.reverse
end

#get_company_data(cvm) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/scrapper_bovespa/scrapper.rb', line 106

def get_company_data(cvm)
	output = {}
	source = Net::HTTP.get(URI("#{@base_query_address}#{cvm}"))
	output[:nome] = extract_simple_text(source.scan(/Nome de Preg.+\r\n.+\<\/td\>/))
	output[:cnpj] = extract_simple_text(source.scan(/CNPJ.+\r\n.+\<\/td\>/))
	output[:site] = extract_site(source.scan(/Site:\<\/td\>\r\n.+\<\/a\>/))

	output[:balanco_patrimonial] = {}
	output[:balanco_patrimonial][:periodo] = extract_many_th(source.scan(/Balan.+\r\n.+\r\n.+\r\n/))
	output[:balanco_patrimonial][:ativo_permanente] = extract_many_td(source.scan(/Ativo Permanente.+\r\n.+\r\n.+\r\n/))
	output[:balanco_patrimonial][:ativo_total] = extract_many_td(source.scan(/Ativo Total.+\r\n.+\r\n.+\r\n/))
	output[:balanco_patrimonial][:patromonio_liquido] = extract_many_td(source.scan(/Patrim.+nio L.+quido.+\r\n.+\r\n.+\r\n/))
	
	output[:demonstracao_do_resultado] = {}
	output[:demonstracao_do_resultado][:periodo] = extract_many_th(source.scan(/Demonstra.+o do Resultado.+\r\n.+\r\n.+\r\n.+\r\n/))
	output[:demonstracao_do_resultado][:receitas_da_intermediacao_financeira] = extract_many_td(source.scan(/Receitas da Intermedia.+ Financeira.+\r\n.+\r\n.+\r\n/))
	output[:demonstracao_do_resultado][:resultado_bruto_de_intermediacao_financeira] = extract_many_td(source.scan(/Resultado Bruto da Intermedia.+ Financeira.+\r\n.+\r\n.+\r\n/))
	output[:demonstracao_do_resultado][:resultado_operacional] = extract_many_td(source.scan(/Resultado Operacional.+\r\n.+\r\n.+\r\n/))
	output[:demonstracao_do_resultado][:lucro_liquido] = extract_many_td(source.scan(/Lucro .+ L.+quido.+\r\n.+\r\n.+\r\n/))

	output[:demonstracao_do_fluxo_de_caixa] = {
	periodo: extract_many_th(source.scan(/Demonstra.+ do Fluxo de Caixa.+\r\n.+\r\n.+\r\n/)),
	atividades_operacionais: extract_many_td(source.scan(/Atividades Operacionais.+\r\n.+\r\n.+\r\n/)),
	atividades_de_investimento: extract_many_td(source.scan(/Atividades de Investimento.+\r\n.+\r\n.+\r\n/)),
	atividades_de_financiamento: extract_many_td(source.scan(/Atividades de Financiamento.+\r\n.+\r\n.+\r\n/)),
	variacao_cambial_sobre_caixa_e_equivalentes: extract_many_td(source.scan(/Varia.+o Cambial sobre Caixa e Equivalentes.+\r\n.+\r\n.+\r\n/)),
	aumento_de_caixa_e_equivalentes: extract_many_td(source.scan(/Aumento .+ de Caixa e Equivalentes.+\r\n.+\r\n.+\r\n/))}

	output[:posicao_acionaria] = {
		headings: extract_many_th(source.scan(/\<th\>Nome.+\r\n.+\r\n.+\r\n.+\r\n/)),
		outros: extract_many_td(source.scan(/\<td\>Outros.+\n.+\n.+\n.+\n/)),
		total: extract_many_td(source.scan(/\<td\>Total.+\n.+\<td.+\n.+\<td.+\n.+\<td.+\n/))}

	output[:composicao_capital_social] = {
		data: extract_data_composicao_capital_social(source.scan(/ - [0-9]+\/[0-9]+\/[0-9]+/)),
		ordinarias: extract_simple_text(source.scan(/Ordin.+rias.+\n.+\n/)),
		preferenciais: extract_simple_text(source.scan(/Preferenciais.+\n.+\n/)),
		total: extract_simple_text(source.scan(/Total.+\n.+\n.+\<\/tr/))}

	output
end

#get_cvmsObject



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/scrapper_bovespa/scrapper.rb', line 148

def get_cvms()
	cvms = []
	guidelines = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 
								'H', 'I', 'J', 'K', 'L', 'M', 'N', 
								'O', 'P', 'Q', 'R', 'S', 'T', 'U', 
								'V', 'W', 'Y', 'X', 'Z', '0', '1', 
								'2', '3', '4', '5', '6', '7', '8', '9']

	guidelines.each do |k|
		source = Net::HTTP.get(URI("#{@code_source}#{k}")).scan(/\>[0-9]+\<\/a\>/)

		source.each do |str|
			sub = str[1..(str.length - 5)]
			if sub.length > 1 then
				cvms << sub.to_i
			end
		end

		source = nil
	end

	puts cvms.size
	cvms
end