Class: Bio::TwoBit

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/twobit.rb,
lib/bio/twobit/version.rb,
lib/bio/twobit/metadata.rb,
lib/bio/twobit/cache_path.rb,
lib/bio/twobit/downloader.rb,
lib/bio/twobit/references/cb3.rb,
lib/bio/twobit/references/ci2.rb,
lib/bio/twobit/references/dm6.rb,
lib/bio/twobit/references/dp3.rb,
lib/bio/twobit/references/fr2.rb,
lib/bio/twobit/references/hs1.rb,
lib/bio/twobit/references/mm9.rb,
lib/bio/twobit/references/rn6.rb,
lib/bio/twobit/references/ce11.rb,
lib/bio/twobit/references/hg19.rb,
lib/bio/twobit/references/hg38.rb,
lib/bio/twobit/references/mm10.rb,
lib/bio/twobit/references/mm39.rb,
lib/bio/twobit/reference_genome.rb,
lib/bio/twobit/references/caepb2.rb,
lib/bio/twobit/references/ailmel1.rb,
lib/bio/twobit/references/allmis1.rb,
lib/bio/twobit/references/anocar2.rb,
lib/bio/twobit/references/anogam3.rb,
lib/bio/twobit/references/apimel2.rb,
lib/bio/twobit/references/aplcal1.rb,
lib/bio/twobit/references/aptman1.rb,
lib/bio/twobit/references/aquchr2.rb,
lib/bio/twobit/references/balacu1.rb,
lib/bio/twobit/references/bostau7.rb,
lib/bio/twobit/references/braflo1.rb,
lib/bio/twobit/references/caejap1.rb,
lib/bio/twobit/references/caerem3.rb,
lib/bio/twobit/references/caljac4.rb,
lib/bio/twobit/references/calmil1.rb,
lib/bio/twobit/references/canfam4.rb,
lib/bio/twobit/references/canfam6.rb,
lib/bio/twobit/references/cersim1.rb,
lib/bio/twobit/references/chlsab2.rb,
lib/bio/twobit/references/chohof1.rb,
lib/bio/twobit/references/chrpic1.rb,
lib/bio/twobit/references/dasnov3.rb,
lib/bio/twobit/references/dipord1.rb,
lib/bio/twobit/references/droana2.rb,
lib/bio/twobit/references/droere1.rb,
lib/bio/twobit/references/drogri1.rb,
lib/bio/twobit/references/dromoj2.rb,
lib/bio/twobit/references/droper1.rb,
lib/bio/twobit/references/drosec1.rb,
lib/bio/twobit/references/drosim1.rb,
lib/bio/twobit/references/drovir2.rb,
lib/bio/twobit/references/droyak2.rb,
lib/bio/twobit/references/ebovir3.rb,
lib/bio/twobit/references/echtel2.rb,
lib/bio/twobit/references/equcab3.rb,
lib/bio/twobit/references/erieur2.rb,
lib/bio/twobit/references/felcat9.rb,
lib/bio/twobit/references/gadmor1.rb,
lib/bio/twobit/references/galgal6.rb,
lib/bio/twobit/references/geofor1.rb,
lib/bio/twobit/references/gorgor6.rb,
lib/bio/twobit/references/hetgla2.rb,
lib/bio/twobit/references/latcha1.rb,
lib/bio/twobit/references/loxafr3.rb,
lib/bio/twobit/references/maceug2.rb,
lib/bio/twobit/references/macfas5.rb,
lib/bio/twobit/references/manpen1.rb,
lib/bio/twobit/references/melgal1.rb,
lib/bio/twobit/references/melund1.rb,
lib/bio/twobit/references/micmur2.rb,
lib/bio/twobit/references/mondom5.rb,
lib/bio/twobit/references/myoluc2.rb,
lib/bio/twobit/references/nanpar1.rb,
lib/bio/twobit/references/naslar1.rb,
lib/bio/twobit/references/neosch1.rb,
lib/bio/twobit/references/nomleu3.rb,
lib/bio/twobit/references/ochpri3.rb,
lib/bio/twobit/references/orenil2.rb,
lib/bio/twobit/references/ornana2.rb,
lib/bio/twobit/references/orycun2.rb,
lib/bio/twobit/references/orylat2.rb,
lib/bio/twobit/references/otogar3.rb,
lib/bio/twobit/references/oviari4.rb,
lib/bio/twobit/references/panpan3.rb,
lib/bio/twobit/references/pantro6.rb,
lib/bio/twobit/references/papanu4.rb,
lib/bio/twobit/references/papham1.rb,
lib/bio/twobit/references/petmar3.rb,
lib/bio/twobit/references/ponabe2.rb,
lib/bio/twobit/references/ponabe3.rb,
lib/bio/twobit/references/pripac1.rb,
lib/bio/twobit/references/procap1.rb,
lib/bio/twobit/references/ptevam1.rb,
lib/bio/twobit/references/saccer3.rb,
lib/bio/twobit/references/saibol1.rb,
lib/bio/twobit/references/sarhar1.rb,
lib/bio/twobit/references/sorara2.rb,
lib/bio/twobit/references/spetri2.rb,
lib/bio/twobit/references/strpur2.rb,
lib/bio/twobit/references/taegut2.rb,
lib/bio/twobit/references/tarsyr2.rb,
lib/bio/twobit/references/tetnig2.rb,
lib/bio/twobit/references/thasir1.rb,
lib/bio/twobit/references/triman1.rb,
lib/bio/twobit/references/tupbel1.rb,
lib/bio/twobit/references/turtru2.rb,
lib/bio/twobit/references/uusfur1.rb,
lib/bio/twobit/references/vicpac2.rb,
lib/bio/twobit/references/wuhcor1.rb,
lib/bio/twobit/references/danrer10.rb,
lib/bio/twobit/references/danrer11.rb,
lib/bio/twobit/references/rhemac10.rb,
lib/bio/twobit/references/susscr11.rb,
lib/bio/twobit/references/xentro10.rb,
lib/bio/twobit/references/crigrichov2.rb,
ext/bio/twobit/twobit.c

Overview

Reader for .2bit files (i.e., from UCSC genome browser)

Direct Known Subclasses

ReferenceGenome

Defined Under Namespace

Classes: AilMel1, AllMis1, AnoCar2, AnoGam3, ApiMel2, AplCal1, AptMan1, AquChr2, BalAcu1, BosTau7, BraFlo1, CachePath, CaeJap1, CaePb2, CaeRem3, CalJac4, CalMil1, CanFam4, CanFam6, Cb3, Ce11, CerSim1, ChlSab2, ChoHof1, ChrPic1, Ci2, CriGriChoV2, DanRer10, DanRer11, DasNov3, DipOrd1, Dm6, Downloader, Dp3, DroAna2, DroEre1, DroGri1, DroMoj2, DroPer1, DroSec1, DroSim1, DroVir2, DroYak2, EboVir3, EchTel2, EquCab3, EriEur2, FelCat9, Fr2, GadMor1, GalGal6, GeoFor1, GorGor6, HetGla2, Hg19, Hg38, Hs1, LatCha1, LoxAfr3, MacEug2, MacFas5, ManPen1, MelGal1, MelUnd1, Metadata, MicMur2, Mm10, Mm39, Mm9, MonDom5, MyoLuc2, NanPar1, NasLar1, NeoSch1, NomLeu3, OchPri3, OreNil2, OrnAna2, OryCun2, OryLat2, OtoGar3, OviAri4, PanPan3, PanTro6, PapAnu4, PapHam1, PetMar3, PonAbe2, PonAbe3, PriPac1, ProCap1, PteVam1, ReferenceGenome, RheMac10, Rn6, SacCer3, SaiBol1, SarHar1, SorAra2, SpeTri2, StrPur2, SusScr11, TaeGut2, TarSyr2, TetNig2, ThaSir1, TriMan1, TupBel1, TurTru2, UusFur1, VicPac2, WuhCor1, XenTro10

Constant Summary collapse

VERSION =
"0.2.1"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fname, masked: false) ⇒ TwoBit

Returns a new instance of TwoBit.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/bio/twobit.rb', line 29

def initialize(fname, masked: false)
  raise "TwoBit::new() does not take block; use TwoBit::open() instead" if block_given?

  fname = fname.to_path if fname.respond_to?(:to_path)
  @fname = fname
  if masked
    mskd = 1
    @masked = true
  else
    mskd = 0
    @masked = false
  end
  initialize_raw(fname, mskd)
end

Instance Attribute Details

#metadataObject (readonly)

Returns the value of attribute metadata.



13
14
15
# File 'lib/bio/twobit.rb', line 13

def 
  @metadata
end

Class Method Details

.open(*args, **kwargs) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
# File 'lib/bio/twobit.rb', line 16

def open(*args, **kwargs)
  file = new(*args, **kwargs)
  return file unless block_given?

  begin
    yield file
  ensure
    file.close
  end
  file
end

Instance Method Details

#bases(chrom, start = 0, stop = 0, fraction: true) ⇒ Object

Raises:

  • (ArgumentError)


63
64
65
66
67
68
# File 'lib/bio/twobit.rb', line 63

def bases(chrom, start = 0, stop = 0, fraction: true)
  raise ArgumentError, "negative start position" if start.negative?
  raise ArgumentError, "negative stop position" if stop.negative?

  bases_raw(chrom, start, stop, fraction ? 1 : 0)
end

#chromsObject



215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'ext/bio/twobit/twobit.c', line 215

static VALUE
twobit_chroms(VALUE self)
{
	TwoBit *tb = getTwoBit(self);

	if (!tb)
	{
		rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
		return Qnil;
	}

	uint32_t i;
	VALUE val;
	VALUE chroms = rb_hash_new();

	for (i = 0; i < tb->hdr->nChroms; i++)
	{
		val = UINT32_2NUM(tb->idx->size[i]);
		if (!val)
			goto error;
		rb_hash_aset(chroms, rb_str_new2(tb->cl->chrom[i]), val);
	}

	return chroms;

error:
	rb_raise(rb_eRuntimeError, "Received an error while adding an item to the output hash!");
	return Qnil;
}

#clear_cache!Object



84
85
86
# File 'lib/bio/twobit.rb', line 84

def clear_cache!
  cache_path.remove
end

#closeObject



112
113
114
115
116
117
118
119
120
121
122
123
# File 'ext/bio/twobit/twobit.c', line 112

static VALUE
twobit_close(VALUE self)
{
	TwoBit *tb = getTwoBit(self);
	if (tb)
	{
		twobitClose(tb);
		DATA_PTR(self) = NULL;
	}

	return Qnil;
}

#closed?Boolean

Returns:

  • (Boolean)


125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'ext/bio/twobit/twobit.c', line 125

static VALUE
twobit_closed_question_mark(VALUE self)
{
	TwoBit *tb = getTwoBit(self);
	if (tb)
	{
		return Qfalse;
	}
	else
	{
		return Qtrue;
	}
}

#hard_masked_blocks(chrom, start = 0, stop = 0) ⇒ Object

Raises:

  • (ArgumentError)


70
71
72
73
74
75
# File 'lib/bio/twobit.rb', line 70

def hard_masked_blocks(chrom, start = 0, stop = 0)
  raise ArgumentError, "negative start position" if start.negative?
  raise ArgumentError, "negative stop position" if stop.negative?

  hard_masked_blocks_raw(chrom, start, stop)
end

#infoObject



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'ext/bio/twobit/twobit.c', line 139

static VALUE
twobit_info(VALUE self)
{
	TwoBit *tb = getTwoBit(self);

	if (!tb)
	{
		rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
		return Qnil;
	}

	uint32_t i, j, foo;
	VALUE val;
	VALUE info = rb_hash_new();

	// file size
	val = UINT64_2NUM(tb->sz);
	if (!val)
		goto error;
	rb_hash_aset(info, rb_str_new2("file_size"), val);

	// nContigs
	val = UINT32_2NUM(tb->hdr->nChroms);
	if (!val)
		goto error;
	rb_hash_aset(info, rb_str_new2("nChroms"), val);

	// sequence length
	foo = 0;
	for (i = 0; i < tb->hdr->nChroms; i++)
	{
		foo += tb->idx->size[i];
	}
	val = UINT32_2NUM(foo);
	if (!val)
		goto error;
	rb_hash_aset(info, rb_str_new2("sequence_length"), val);

	// hard-masked length
	foo = 0;
	for (i = 0; i < tb->hdr->nChroms; i++)
	{
		for (j = 0; j < tb->idx->nBlockCount[i]; j++)
		{
			foo += tb->idx->nBlockSizes[i][j];
		}
	}
	val = UINT32_2NUM(foo);
	if (!val)
		goto error;
	rb_hash_aset(info, rb_str_new2("hard_masked_length"), val);

	// soft-masked length
	if (tb->idx->maskBlockStart)
	{
		foo = 0;
		for (i = 0; i < tb->hdr->nChroms; i++)
		{
			for (j = 0; j < tb->idx->maskBlockCount[i]; j++)
			{
				foo += tb->idx->maskBlockSizes[i][j];
			}
		}
		val = UINT32_2NUM(foo);
		if (!val)
			goto error;
		rb_hash_aset(info, rb_str_new2("soft_masked_length"), val);
	}

	return info;

error:
	rb_raise(rb_eRuntimeError, "Received an error while gathering information on the 2bit file!");
	return Qnil;
}

#masked?Boolean

Returns:

  • (Boolean)


48
49
50
# File 'lib/bio/twobit.rb', line 48

def masked?
  @masked
end

#pathObject



44
45
46
# File 'lib/bio/twobit.rb', line 44

def path
  @fname
end

#sequence(chrom, start = 0, stop = 0) ⇒ Object Also known as: seq

Since “end” is a reserved word in Ruby, use “stop” instead.

Raises:

  • (ArgumentError)


54
55
56
57
58
59
# File 'lib/bio/twobit.rb', line 54

def sequence(chrom, start = 0, stop = 0)
  raise ArgumentError, "negative start position" if start.negative?
  raise ArgumentError, "negative stop position" if stop.negative?

  sequence_raw(chrom, start, stop)
end

#soft_masked_blocks(chrom, start = 0, stop = 0) ⇒ Object

Raises:

  • (ArgumentError)


77
78
79
80
81
82
# File 'lib/bio/twobit.rb', line 77

def soft_masked_blocks(chrom, start = 0, stop = 0)
  raise ArgumentError, "negative start position" if start.negative?
  raise ArgumentError, "negative stop position" if stop.negative?

  soft_masked_blocks_raw(chrom, start, stop)
end