class DBM

Introduction

The DBM class provides a wrapper to a Unix-style dbm or Database Manager library.

Dbm databases do not have tables or columns; they are simple key-value data stores, like a Ruby Hash except not resident in RAM. Keys and values must be strings.

The exact library used depends on how Ruby was compiled. It could be any of the following:

All of these dbm implementations have their own Ruby interfaces available, which provide richer (but varying) APIs.

Cautions

Before you decide to use DBM, there are some issues you should consider:

Given the above cautions, DBM is not a good choice for long term storage of important data. It is probably best used as a fast and easy alternative to a Hash for processing large amounts of data.

Example

require 'dbm'
db = DBM.open('rfcs', 0666, DBM::WRCREAT)
db['822'] = 'Standard for the Format of ARPA Internet Text Messages'
db['1123'] = 'Requirements for Internet Hosts - Application and Support'
db['3068'] = 'An Anycast Prefix for 6to4 Relay Routers'
puts db['822']

Constants

NEWDB

Indicates that dbm_open() should open the database in read/write mode, create it if it does not already exist, and delete all contents if it does already exist.

READER

Indicates that dbm_open() should open the database in read-only mode

VERSION

Identifies ndbm library version.

Examples:

  • “ndbm (4.3BSD)”

  • “Berkeley DB 4.8.30: (April 9, 2010)”

  • “Berkeley DB (unknown)” (4.4BSD, maybe)

  • “GDBM version 1.8.3. 10/15/2002 (built Jul 1 2011 12:32:45)”

  • “QDBM 1.8.78”

WRCREAT

Indicates that dbm_open() should open the database in read/write mode, and create it if it does not already exist

WRITER

Indicates that dbm_open() should open the database in read/write mode

Public Class Methods

new(filename[, mode[, flags]]) → dbm click to toggle source

Open a dbm database with the specified name, which can include a directory path. Any file extensions needed will be supplied automatically by the dbm library. For example, Berkeley DB appends '.db', and GNU gdbm uses two physical files with extensions '.dir' and '.pag'.

The mode should be an integer, as for Unix chmod.

Flags should be one of READER, WRITER, WRCREAT or NEWDB.

static VALUE
fdbm_initialize(int argc, VALUE *argv, VALUE obj)
{
    VALUE file, vmode, vflags;
    DBM *dbm;
    struct dbmdata *dbmp;
    int mode, flags = 0;

    TypedData_Get_Struct(obj, struct dbmdata, &dbm_type, dbmp);
    if (rb_scan_args(argc, argv, "12", &file, &vmode, &vflags) == 1) {
        mode = 0666;           /* default value */
    }
    else if (NIL_P(vmode)) {
        mode = -1;             /* return nil if DB not exist */
    }
    else {
        mode = NUM2INT(vmode);
    }

    if (!NIL_P(vflags))
        flags = NUM2INT(vflags);

    FilePathValue(file);

    /*
     * Note:
     * gdbm 1.10 works with O_CLOEXEC.  gdbm 1.9.1 silently ignore it.
     */
#ifndef O_CLOEXEC
#   define O_CLOEXEC 0
#endif

    if (flags & RUBY_DBM_RW_BIT) {
        flags &= ~RUBY_DBM_RW_BIT;
        dbm = dbm_open(RSTRING_PTR(file), flags|O_CLOEXEC, mode);
    }
    else {
        dbm = 0;
        if (mode >= 0) {
            dbm = dbm_open(RSTRING_PTR(file), O_RDWR|O_CREAT|O_CLOEXEC, mode);
        }
        if (!dbm) {
            dbm = dbm_open(RSTRING_PTR(file), O_RDWR|O_CLOEXEC, 0);
        }
        if (!dbm) {
            dbm = dbm_open(RSTRING_PTR(file), O_RDONLY|O_CLOEXEC, 0);
        }
    }

    if (dbm) {
        /*
         * History of dbm_pagfno() and dbm_dirfno() in ndbm and its compatibles.
         * (dbm_pagfno() and dbm_dirfno() is not standardized.)
         *
         * 1986: 4.3BSD provides ndbm.
         *       It provides dbm_pagfno() and dbm_dirfno() as macros.
         * 1991: gdbm-1.5 provides them as functions.
         *       They returns a same descriptor.
         *       (Earlier releases may have the functions too.)
         * 1991: Net/2 provides Berkeley DB.
         *       It doesn't provide dbm_pagfno() and dbm_dirfno().
         * 1992: 4.4BSD Alpha provides Berkeley DB with dbm_dirfno() as a function.
         *       dbm_pagfno() is a macro as DBM_PAGFNO_NOT_AVAILABLE.
         * 1997: Berkeley DB 2.0 is released by Sleepycat Software, Inc.
         *       It defines dbm_pagfno() and dbm_dirfno() as macros.
         * 2011: gdbm-1.9 creates a separate dir file.
         *       dbm_pagfno() and dbm_dirfno() returns different descriptors.
         */
#if defined(HAVE_DBM_PAGFNO)
        rb_fd_fix_cloexec(dbm_pagfno(dbm));
#endif
#if defined(HAVE_DBM_DIRFNO)
        rb_fd_fix_cloexec(dbm_dirfno(dbm));
#endif

#if defined(RUBYDBM_DB_HEADER) && defined(HAVE_TYPE_DBC)
        /* Disable Berkeley DB error messages such as:
         * DB->put: attempt to modify a read-only database */
        ((DBC*)dbm)->dbp->set_errfile(((DBC*)dbm)->dbp, NULL);
#endif
    }

    if (!dbm) {
        if (mode == -1) return Qnil;
        rb_sys_fail_str(file);
    }

    if (dbmp->di_dbm)
        dbm_close(dbmp->di_dbm);
    dbmp->di_dbm = dbm;
    dbmp->di_size = -1;

    return obj;
}
open(filename[, mode[, flags]]) → dbm click to toggle source
open(filename[, mode[, flags]]) {|dbm| block}

Open a dbm database and yields it if a block is given. See also DBM.new.

static VALUE
fdbm_s_open(int argc, VALUE *argv, VALUE klass)
{
    VALUE obj = fdbm_alloc(klass);

    if (NIL_P(fdbm_initialize(argc, argv, obj))) {
        return Qnil;
    }

    if (rb_block_given_p()) {
        return rb_ensure(rb_yield, obj, fdbm_close, obj);
    }

    return obj;
}

Public Instance Methods

dbm[key] → string value or nil click to toggle source

Return a value from the database by locating the key string provided. If the key is not found, returns nil.

static VALUE
fdbm_aref(VALUE obj, VALUE keystr)
{
    return fdbm_fetch(obj, keystr, Qnil);
}
dbm[key] = value click to toggle source

Stores the specified string value in the database, indexed via the string key provided.

static VALUE
fdbm_store(VALUE obj, VALUE keystr, VALUE valstr)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;

    fdbm_modify(obj);
    keystr = rb_obj_as_string(keystr);
    valstr = rb_obj_as_string(valstr);

    key.dptr = RSTRING_PTR(keystr);
    key.dsize = RSTRING_DSIZE(keystr);

    val.dptr = RSTRING_PTR(valstr);
    val.dsize = RSTRING_DSIZE(valstr);

    GetDBM2(obj, dbmp, dbm);
    dbmp->di_size = -1;
    if (dbm_store(dbm, key, val, DBM_REPLACE)) {
        dbm_clearerr(dbm);
        if (errno == EPERM) rb_sys_fail(0);
        rb_raise(rb_eDBMError, "dbm_store failed");
    }

    return valstr;
}
Also aliased as: store
clear click to toggle source

Deletes all data from the database.

static VALUE
fdbm_clear(VALUE obj)
{
    datum key;
    struct dbmdata *dbmp;
    DBM *dbm;

    fdbm_modify(obj);
    GetDBM2(obj, dbmp, dbm);
    dbmp->di_size = -1;
    while (key = dbm_firstkey(dbm), key.dptr) {
        if (dbm_delete(dbm, key)) {
            rb_raise(rb_eDBMError, "dbm_delete failed");
        }
    }
    dbmp->di_size = 0;

    return obj;
}
close click to toggle source

Closes the database.

static VALUE
fdbm_close(VALUE obj)
{
    struct dbmdata *dbmp;

    GetDBM(obj, dbmp);
    dbm_close(dbmp->di_dbm);
    dbmp->di_dbm = 0;

    return Qnil;
}
closed? → true or false click to toggle source

Returns true if the database is closed, false otherwise.

static VALUE
fdbm_closed(VALUE obj)
{
    struct dbmdata *dbmp;

    TypedData_Get_Struct(obj, struct dbmdata, &dbm_type, dbmp);
    if (dbmp->di_dbm == 0)
        return Qtrue;

    return Qfalse;
}
delete(key) click to toggle source

Deletes an entry from the database.

static VALUE
fdbm_delete(VALUE obj, VALUE keystr)
{
    datum key, value;
    struct dbmdata *dbmp;
    DBM *dbm;
    VALUE valstr;
    long len;

    fdbm_modify(obj);
    ExportStringValue(keystr);
    len = RSTRING_LEN(keystr);
    if (TOO_LONG(len)) goto not_found;
    key.dptr = RSTRING_PTR(keystr);
    key.dsize = (DSIZE_TYPE)len;

    GetDBM2(obj, dbmp, dbm);

    value = dbm_fetch(dbm, key);
    if (value.dptr == 0) {
      not_found:
        if (rb_block_given_p()) return rb_yield(keystr);
        return Qnil;
    }

    /* need to save value before dbm_delete() */
    valstr = rb_str_new(value.dptr, value.dsize);

    if (dbm_delete(dbm, key)) {
        dbmp->di_size = -1;
        rb_raise(rb_eDBMError, "dbm_delete failed");
    }
    else if (dbmp->di_size >= 0) {
        dbmp->di_size--;
    }
    return valstr;
}
delete_if {|key, value| block} → self click to toggle source

Deletes all entries for which the code block returns true. Returns self.

static VALUE
fdbm_delete_if(VALUE obj)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    VALUE keystr, valstr;
    VALUE ret, ary = rb_ary_tmp_new(0);
    int status = 0;
    long i, n;

    fdbm_modify(obj);
    GetDBM2(obj, dbmp, dbm);
    n = dbmp->di_size;
    dbmp->di_size = -1;

    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        keystr = rb_str_new(key.dptr, key.dsize);
        OBJ_FREEZE(keystr);
        valstr = rb_str_new(val.dptr, val.dsize);
        ret = rb_protect(rb_yield, rb_assoc_new(rb_str_dup(keystr), valstr), &status);
        if (status != 0) break;
        if (RTEST(ret)) rb_ary_push(ary, keystr);
        GetDBM2(obj, dbmp, dbm);
    }

    for (i = 0; i < RARRAY_LEN(ary); i++) {
        keystr = RARRAY_AREF(ary, i);
        key.dptr = RSTRING_PTR(keystr);
        key.dsize = (DSIZE_TYPE)RSTRING_LEN(keystr);
        if (dbm_delete(dbm, key)) {
            rb_raise(rb_eDBMError, "dbm_delete failed");
        }
    }
    if (status) rb_jump_tag(status);
    if (n > 0) dbmp->di_size = n - RARRAY_LEN(ary);
    rb_ary_clear(ary);

    return obj;
}
Also aliased as: reject!

Calls the block once for each [key, value] pair in the database. Returns self.

static VALUE
fdbm_each_pair(VALUE obj)
{
    datum key, val;
    DBM *dbm;
    struct dbmdata *dbmp;
    VALUE keystr, valstr;

    RETURN_ENUMERATOR(obj, 0, 0);

    GetDBM2(obj, dbmp, dbm);

    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        keystr = rb_str_new(key.dptr, key.dsize);
        valstr = rb_str_new(val.dptr, val.dsize);
        rb_yield(rb_assoc_new(keystr, valstr));
        GetDBM2(obj, dbmp, dbm);
    }

    return obj;
}
Also aliased as: each_pair
each_key {|key| block} → self click to toggle source

Calls the block once for each key string in the database. Returns self.

static VALUE
fdbm_each_key(VALUE obj)
{
    datum key;
    struct dbmdata *dbmp;
    DBM *dbm;

    RETURN_ENUMERATOR(obj, 0, 0);

    GetDBM2(obj, dbmp, dbm);
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        rb_yield(rb_str_new(key.dptr, key.dsize));
        GetDBM2(obj, dbmp, dbm);
    }
    return obj;
}

Calls the block once for each [key, value] pair in the database. Returns self.

Alias for: each
each_value {|value| block} → self click to toggle source

Calls the block once for each value string in the database. Returns self.

static VALUE
fdbm_each_value(VALUE obj)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;

    RETURN_ENUMERATOR(obj, 0, 0);

    GetDBM2(obj, dbmp, dbm);
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        rb_yield(rb_str_new(val.dptr, val.dsize));
        GetDBM2(obj, dbmp, dbm);
    }
    return obj;
}
empty? click to toggle source

Returns true if the database is empty, false otherwise.

static VALUE
fdbm_empty_p(VALUE obj)
{
    datum key;
    struct dbmdata *dbmp;
    DBM *dbm;

    GetDBM2(obj, dbmp, dbm);
    if (dbmp->di_size < 0) {
        dbm = dbmp->di_dbm;

        for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
            return Qfalse;
        }
    }
    else {
        if (dbmp->di_size)
            return Qfalse;
    }
    return Qtrue;
}
fetch(key[, ifnone]) → value click to toggle source

Return a value from the database by locating the key string provided. If the key is not found, returns ifnone. If ifnone is not given, raises IndexError.

static VALUE
fdbm_fetch_m(int argc, VALUE *argv, VALUE obj)
{
    VALUE keystr, valstr, ifnone;

    rb_scan_args(argc, argv, "11", &keystr, &ifnone);
    valstr = fdbm_fetch(obj, keystr, ifnone);
    if (argc == 1 && !rb_block_given_p() && NIL_P(valstr))
        rb_raise(rb_eIndexError, "key not found");

    return valstr;
}
has_key?(key) → boolean

Returns true if the database contains the specified key, false otherwise.

Alias for: include?
has_value?(value) → boolean click to toggle source

Returns true if the database contains the specified string value, false otherwise.

static VALUE
fdbm_has_value(VALUE obj, VALUE valstr)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    long len;

    ExportStringValue(valstr);
    len = RSTRING_LEN(valstr);
    if (TOO_LONG(len)) return Qfalse;
    val.dptr = RSTRING_PTR(valstr);
    val.dsize = (DSIZE_TYPE)len;

    GetDBM2(obj, dbmp, dbm);
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        if ((DSIZE_TYPE)val.dsize == (DSIZE_TYPE)RSTRING_LEN(valstr) &&
            memcmp(val.dptr, RSTRING_PTR(valstr), val.dsize) == 0)
            return Qtrue;
    }
    return Qfalse;
}
Also aliased as: value?
include?(key) → boolean click to toggle source

Returns true if the database contains the specified key, false otherwise.

static VALUE
fdbm_has_key(VALUE obj, VALUE keystr)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    long len;

    ExportStringValue(keystr);
    len = RSTRING_LEN(keystr);
    if (TOO_LONG(len)) return Qfalse;
    key.dptr = RSTRING_PTR(keystr);
    key.dsize = (DSIZE_TYPE)len;

    GetDBM2(obj, dbmp, dbm);
    val = dbm_fetch(dbm, key);
    if (val.dptr) return Qtrue;
    return Qfalse;
}
Also aliased as: has_key?, member?, key?
invert → hash click to toggle source

Returns a Hash (not a DBM database) created by using each value in the database as a key, with the corresponding key as its value.

static VALUE
fdbm_invert(VALUE obj)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    VALUE keystr, valstr;
    VALUE hash = rb_hash_new();

    GetDBM2(obj, dbmp, dbm);
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        keystr = rb_str_new(key.dptr, key.dsize);
        valstr = rb_str_new(val.dptr, val.dsize);
        rb_hash_aset(hash, valstr, keystr);
    }
    return hash;
}
key(value) → string click to toggle source

Returns the key for the specified value.

static VALUE
fdbm_key(VALUE obj, VALUE valstr)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    long len;

    ExportStringValue(valstr);
    len = RSTRING_LEN(valstr);
    if (TOO_LONG(len)) return Qnil;

    GetDBM2(obj, dbmp, dbm);
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        if ((long)val.dsize == RSTRING_LEN(valstr) &&
            memcmp(val.dptr, RSTRING_PTR(valstr), val.dsize) == 0) {
            return rb_str_new(key.dptr, key.dsize);
        }
    }
    return Qnil;
}
key?(key) → boolean

Returns true if the database contains the specified key, false otherwise.

Alias for: include?
keys → array click to toggle source

Returns an array of all the string keys in the database.

static VALUE
fdbm_keys(VALUE obj)
{
    datum key;
    struct dbmdata *dbmp;
    DBM *dbm;
    VALUE ary;

    GetDBM2(obj, dbmp, dbm);

    ary = rb_ary_new();
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        rb_ary_push(ary, rb_str_new(key.dptr, key.dsize));
    }

    return ary;
}
length → integer click to toggle source

Returns the number of entries in the database.

static VALUE
fdbm_length(VALUE obj)
{
    datum key;
    struct dbmdata *dbmp;
    DBM *dbm;
    int i = 0;

    GetDBM2(obj, dbmp, dbm);
    if (dbmp->di_size > 0) return INT2FIX(dbmp->di_size);

    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        i++;
    }
    dbmp->di_size = i;

    return INT2FIX(i);
}
Also aliased as: size
member?(key) → boolean

Returns true if the database contains the specified key, false otherwise.

Alias for: include?
reject {|key,value| block} → Hash click to toggle source

Converts the contents of the database to an in-memory Hash, then calls Hash#reject with the specified code block, returning a new Hash.

static VALUE
fdbm_reject(VALUE obj)
{
    return rb_hash_delete_if(fdbm_to_hash(obj));
}
reject! {|key, value| block} → self

Deletes all entries for which the code block returns true. Returns self.

Alias for: delete_if
replace(obj) click to toggle source

Replaces the contents of the database with the contents of the specified object. Takes any object which implements the each_pair method, including Hash and DBM objects.

static VALUE
fdbm_replace(VALUE obj, VALUE other)
{
    fdbm_clear(obj);
    rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj);
    return obj;
}
select {|key, value| block} → array click to toggle source

Returns a new array consisting of the [key, value] pairs for which the code block returns true.

static VALUE
fdbm_select(VALUE obj)
{
    VALUE new = rb_ary_new();
    datum key, val;
    DBM *dbm;
    struct dbmdata *dbmp;

    GetDBM2(obj, dbmp, dbm);
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        VALUE assoc, v;
        val = dbm_fetch(dbm, key);
        assoc = rb_assoc_new(rb_str_new(key.dptr, key.dsize),
                             rb_str_new(val.dptr, val.dsize));
        v = rb_yield(assoc);
        if (RTEST(v)) {
            rb_ary_push(new, assoc);
        }
        GetDBM2(obj, dbmp, dbm);
    }

    return new;
}
shift() → [key, value] click to toggle source

Removes a [key, value] pair from the database, and returns it. If the database is empty, returns nil. The order in which values are removed/returned is not guaranteed.

static VALUE
fdbm_shift(VALUE obj)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    VALUE keystr, valstr;

    fdbm_modify(obj);
    GetDBM2(obj, dbmp, dbm);
    dbmp->di_size = -1;

    key = dbm_firstkey(dbm);
    if (!key.dptr) return Qnil;
    val = dbm_fetch(dbm, key);
    keystr = rb_str_new(key.dptr, key.dsize);
    valstr = rb_str_new(val.dptr, val.dsize);
    dbm_delete(dbm, key);

    return rb_assoc_new(keystr, valstr);
}
size → integer

Returns the number of entries in the database.

Alias for: length
store(key, value) → value

Stores the specified string value in the database, indexed via the string key provided.

Alias for: []=
to_a → array click to toggle source

Converts the contents of the database to an array of [key, value] arrays, and returns it.

static VALUE
fdbm_to_a(VALUE obj)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    VALUE ary;

    GetDBM2(obj, dbmp, dbm);
    ary = rb_ary_new();
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        rb_ary_push(ary, rb_assoc_new(rb_str_new(key.dptr, key.dsize),
                                      rb_str_new(val.dptr, val.dsize)));
    }

    return ary;
}
to_hash → hash click to toggle source

Converts the contents of the database to an in-memory Hash object, and returns it.

static VALUE
fdbm_to_hash(VALUE obj)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    VALUE hash;

    GetDBM2(obj, dbmp, dbm);
    hash = rb_hash_new();
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        rb_hash_aset(hash, rb_str_new(key.dptr, key.dsize),
                           rb_str_new(val.dptr, val.dsize));
    }

    return hash;
}
update(obj) click to toggle source

Updates the database with multiple values from the specified object. Takes any object which implements the each_pair method, including Hash and DBM objects.

static VALUE
fdbm_update(VALUE obj, VALUE other)
{
    rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj);
    return obj;
}
value?(value) → boolean

Returns true if the database contains the specified string value, false otherwise.

Alias for: has_value?
values → array click to toggle source

Returns an array of all the string values in the database.

static VALUE
fdbm_values(VALUE obj)
{
    datum key, val;
    struct dbmdata *dbmp;
    DBM *dbm;
    VALUE ary;

    GetDBM2(obj, dbmp, dbm);
    ary = rb_ary_new();
    for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) {
        val = dbm_fetch(dbm, key);
        rb_ary_push(ary, rb_str_new(val.dptr, val.dsize));
    }

    return ary;
}
values_at(key, ...) → Array click to toggle source

Returns an array containing the values associated with the given keys.

static VALUE
fdbm_values_at(int argc, VALUE *argv, VALUE obj)
{
    VALUE new = rb_ary_new2(argc);
    int i;

    for (i=0; i<argc; i++) {
        rb_ary_push(new, fdbm_fetch(obj, argv[i], Qnil));
    }

    return new;
}