Class: Bio::CGRanges

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/cgranges.rb,
lib/bio/cgranges/version.rb,
ext/bio/cgranges/cgranges.c

Overview

Reader for .2bit files (i.e., from UCSC genome browser)

Defined Under Namespace

Classes: IndexedError, NoIndexError

Constant Summary collapse

VERSION =
"0.0.4"

Instance Method Summary collapse

Constructor Details

#initializeBio::CGRanges

Create a new cgranges object



99
100
101
102
103
104
105
106
107
108
109
110
# File 'ext/bio/cgranges/cgranges.c', line 99

static VALUE
cgranges_init(VALUE self)
{
  cgranges_t *cr = NULL;

  cr = cr_init();
  DATA_PTR(self) = cr;

  rb_ivar_set(self, rb_intern("@indexed"), Qfalse);

  return self;
}

Instance Method Details

#add(rb_ctg, rb_st, rb_en, rb_label) ⇒ Bio::CGRanges

Add a genomic interval to the cgranges object.

Parameters:

  • contig (String)

    The contig name

  • start (Fixnum)

    The start position of the interval.

  • end (Fixnum)

    The end position of the interval.

  • label (Fixnum)

    The label of the interval.

Returns:



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'ext/bio/cgranges/cgranges.c', line 120

static VALUE
cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
{
  cgranges_t *cr = get_cgranges(self);
  cr_intv_t *intv = NULL;
  char *ctg = NULL;
  int32_t st = 0;
  int32_t en = 0;
  int32_t label = 0;

  if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
  {
    rb_raise(rb_eIndexedError, "Cannot add intervals to an indexed CGRanges");
    return Qnil;
  }

  ctg = StringValueCStr(rb_ctg);
  st = NUM2INT32(rb_st);
  en = NUM2INT32(rb_en);
  label = NUM2INT32(rb_label);

  intv = cr_add(cr, ctg, st, en, label);

  if (!intv)
  {
    rb_raise(rb_eRuntimeError, "Error adding interval");
    return Qnil;
  }

  return self;
}

#contain(rb_ctg, rb_st, rb_en) ⇒ Array

Containment query.

Parameters:

  • contig (String)

    The contig name

  • start (Fixnum)

    The start position of the interval.

  • end (Fixnum)

    The end position of the interval.

Returns:

  • (Array)

    An array of [contig, start, end, label] arrays.



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'ext/bio/cgranges/cgranges.c', line 273

static VALUE
cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
{
  cgranges_t *cr = get_cgranges(self);
  char *ctg = NULL;
  int32_t st = 0;
  int32_t en = 0;

  int64_t *b = NULL;
  int64_t m_b = 0;
  int64_t n = 0;

  if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
  {
    rb_raise(rb_eNoIndexError, "CGRanges not indexed");
    return Qnil;
  }

  ctg = StringValueCStr(rb_ctg);
  st = NUM2INT32(rb_st);
  en = NUM2INT32(rb_en);

  n = cr_contain(cr, ctg, st, en, &b, &m_b);

  if (n < 0)
  {
    free(b);
    rb_raise(rb_eRuntimeError, "Error finding contained");
    return Qnil;
  }

  VALUE result = rb_ary_new2(n);

  for (int64_t i = 0; i < n; i++)
  {
    VALUE rb_intv = rb_ary_new3(
        4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
    rb_ary_push(result, rb_intv);
  }
  free(b);

  return result;
}

#count_contain(rb_ctg, rb_st, rb_en) ⇒ Fixnum

Get the number of contained intervals.

Parameters:

  • contig (String)

    The contig name

  • start (Fixnum)

    The start position of the interval.

  • end (Fixnum)

    The end position of the interval.

Returns:

  • (Fixnum)

    The number of contained intervals.



324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'ext/bio/cgranges/cgranges.c', line 324

static VALUE
cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
{
  cgranges_t *cr = get_cgranges(self);
  char *ctg = NULL;
  int32_t st = 0;
  int32_t en = 0;

  int64_t *b = NULL;
  int64_t m_b = 0;
  int64_t n = 0;

  if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
  {
    rb_raise(rb_eNoIndexError, "CGRanges not indexed");
    return Qnil;
  }

  ctg = StringValueCStr(rb_ctg);
  st = NUM2INT32(rb_st);
  en = NUM2INT32(rb_en);

  n = cr_contain(cr, ctg, st, en, &b, &m_b);

  if (n < 0)
  {
    free(b);
    rb_raise(rb_eRuntimeError, "Error finding contained");
    return Qnil;
  }
  free(b);

  return INT64_2NUM(n);
}

#count_overlap(rb_ctg, rb_st, rb_en) ⇒ Fixnum

Get the number of overlapping intervals.

Parameters:

  • contig (String)

    The contig name

  • start (Fixnum)

    The start position of the interval.

  • end (Fixnum)

    The end position of the interval.

Returns:

  • (Fixnum)

    The number of overlapping intervals.



231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'ext/bio/cgranges/cgranges.c', line 231

static VALUE
cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
{
  cgranges_t *cr = get_cgranges(self);
  char *ctg = NULL;
  int32_t st = 0;
  int32_t en = 0;

  int64_t *b = NULL;
  int64_t m_b = 0;
  int64_t n = 0;

  if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
  {
    rb_raise(rb_eNoIndexError, "CGRanges not indexed");
    return Qnil;
  }

  ctg = StringValueCStr(rb_ctg);
  st = NUM2INT32(rb_st);
  en = NUM2INT32(rb_en);

  n = cr_overlap(cr, ctg, st, en, &b, &m_b);

  if (n < 0)
  {
    free(b);
    rb_raise(rb_eRuntimeError, "Error finding overlaps");
    return Qnil;
  }
  free(b);

  return INT64_2NUM(n);
}

#coverage(ctg, rb_st, rb_en, mode: :overlap) ⇒ Array

Calculate breadth of coverage. This is a wrapper method for ‘coverage_overlap` and `coverage_contain`.

Parameters:

  • contig (String)

    The contig name

  • start (Fixnum)

    The start position of the interval.

  • end (Fixnum)

    The end position of the interval.

  • mode (Symbol) (defaults to: :overlap)

    :overlap or :contain (default: :overlap)

Returns:

  • (Array)

    The breadth of coverage and the number of intervals.

See Also:



19
20
21
22
23
24
25
26
27
28
# File 'lib/bio/cgranges.rb', line 19

def coverage(ctg, rb_st, rb_en, mode: :overlap)
  case mode
  when :overlap
    coverage_overlap(ctg, rb_st, rb_en)
  when :contain
    coverage_contain(ctg, rb_st, rb_en)
  else
    raise ArgumentError, "unknown mode: #{mode}"
  end
end

#coverage_contain(rb_ctg, rb_st, rb_en) ⇒ Array

Calculate breadth of coverage. (Containment) same as coverage(contig, start, end, mode: contain)

Parameters:

  • contig (String)

    The contig name

  • start (Fixnum)

    The start position of the interval.

  • end (Fixnum)

    The end position of the interval.

Returns:

  • (Array)

    The breadth of coverage and the number of intervals.

See Also:



446
447
448
449
450
451
452
# File 'ext/bio/cgranges/cgranges.c', line 446

static VALUE
cgranges_coverage_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
{
  VALUE result;
  result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 1);
  return result;
}

#coverage_overlap(rb_ctg, rb_st, rb_en) ⇒ Array

Calculate breadth of coverage. (Overlap) Same as coverage(contig, start, end, mode: overlap)

Parameters:

  • contig (String)

    The contig name

  • start (Fixnum)

    The start position of the interval.

  • end (Fixnum)

    The end position of the interval.

Returns:

  • (Array)

    The breadth of coverage and the number of intervals.

See Also:



429
430
431
432
433
434
435
# File 'ext/bio/cgranges/cgranges.c', line 429

static VALUE
cgranges_coverage_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
{
  VALUE result;
  result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 0);
  return result;
}

#indexBio::CGRanges

Index.

Returns:



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'ext/bio/cgranges/cgranges.c', line 156

static VALUE
cgranges_index(VALUE self)
{
  if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
  {
    rb_raise(rb_eIndexedError, "CGRanges already indexed");
    return Qnil;
  }

  cgranges_t *cr = get_cgranges(self);
  cr_index(cr);

  rb_ivar_set(self, rb_intern("@indexed"), Qtrue);

  return self;
}

#overlap(rb_ctg, rb_st, rb_en) ⇒ Array

Overlap query.

Parameters:

  • contig (String)

    The contig name

  • start (Fixnum)

    The start position of the interval.

  • end (Fixnum)

    The end position of the interval.

Returns:

  • (Array)

    An array of [contig, start, end, label] arrays.



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'ext/bio/cgranges/cgranges.c', line 180

static VALUE
cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
{
  cgranges_t *cr = get_cgranges(self);
  char *ctg = NULL;
  int32_t st = 0;
  int32_t en = 0;

  int64_t *b = NULL;
  int64_t m_b = 0;
  int64_t n = 0;

  if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
  {
    rb_raise(rb_eNoIndexError, "CGRanges not indexed");
    return Qnil;
  }

  ctg = StringValueCStr(rb_ctg);
  st = NUM2INT32(rb_st);
  en = NUM2INT32(rb_en);

  n = cr_overlap(cr, ctg, st, en, &b, &m_b);

  if (n < 0)
  {
    free(b);
    rb_raise(rb_eRuntimeError, "Error finding overlaps");
    return Qnil;
  }

  VALUE result = rb_ary_new2(n);

  for (int64_t i = 0; i < n; i++)
  {
    VALUE rb_intv = rb_ary_new3(
        4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
    rb_ary_push(result, rb_intv);
  }
  free(b);

  return result;
}