#47729 Directory Server crashes if shutdown during a replication initialization
Closed: wontfix None Opened 10 years ago by mreynolds.

Ticket was cloned from Red Hat Bugzilla (product Red Hat Enterprise Linux 6): Bug 1070583

Description of problem:

rhds91 crash with 389-ds-base-1.2.11.15-31.el6_5.x86_64

scenario may be:
4x MMR with SSL

9 backends
use ldclt to add 10.000 entries to all 9 backends (9x ldclt to one master)
use ldclt to delete all (9x ldclt to same master)
repeat about 10 to 15 times

data is similar to this
cn=rhds00002,ou=people,dc=dc1,dc=example,dc=com

export rmax=10000
export filter=xxxxx
export testname=test1
ldclt -b ou=people,dc=dc1,dc=example,dc=com -D "cn=directory manager" -w
password -h $master -e add,incr,noloop,person,commoncounter,referral=off -r 1
-R $rmax -f "cn=rhds$filter" -v > $testname.txt &



Version-Release number of selected component (if applicable):

389-ds-base-1.2.11.15-31.el6_5.x86_64
redhat-ds-base-9.1.0-1.el6dsrv.x86_64


How reproducible:

unknown

Steps to Reproduce:
1. not exactly sure yet
2.
3.

Actual results:

Core was generated by `/usr/sbin/ns-slapd -D
/etc/dirsrv/slapd-pool-ldapbsa-provexpresso-01 -i /var/ru'.
Program terminated with signal 11, Segmentation fault.
#0  0x00007ff774fb58eb in __dbc_get_pp (dbc=0x1c2e0d0, key=0x7ff6a0df33b0,
data=0x7ff6a0df3380, flags=26) at ../../db/db_iface.c:2091
2091            ENV_ENTER(env, ip);

(gdb) where
#0  0x00007ff774fb58eb in __dbc_get_pp (dbc=0x1c2e0d0, key=0x7ff6a0df33b0,
data=0x7ff6a0df3380, flags=26) at ../../db/db_iface.c:2091
#1  0x00007ff7731c5c21 in entryrdn_lookup_dn (be=0x1c0dd50, rdn=<value
optimized out>, id=<value optimized out>, dn=0x7ff6a0df34e8, txn=<value
optimized out>) at ldap/servers/slapd/back-ldbm/ldbm_entryrdn.c:1241
#2  0x00007ff7731a231d in id2entry (be=0x1c0dd50, id=115270,
txn=0x7ff6a0df35c0, err=<value optimized out>) at
ldap/servers/slapd/back-ldbm/id2entry.c:378
#3  0x00007ff7731d5602 in ldbm_back_next_search_entry_ext (pb=0x7ff6800b23c0,
use_extension=0) at ldap/servers/slapd/back-ldbm/ldbm_search.c:1487
#4  0x0000003d90086251 in iterate (pb=0x7ff6800b23c0, be=<value optimized out>,
pnentries=0x7ff6a0df77b4, pagesize=-1, pr_statp=0x7ff6a0df77a8, send_result=1)
at ldap/servers/slapd/opshared.c:1281
#5  0x0000003d90086777 in send_results_ext (pb=0x7ff6800b23c0,
nentries=0x7ff6a0df77b4, pagesize=-1, pr_stat=0x7ff6a0df77a8, send_result=1) at
ldap/servers/slapd/opshared.c:1682
#6  0x0000003d900874cd in op_shared_search (pb=0x7ff6800b23c0, send_result=1)
at ldap/servers/slapd/opshared.c:853
#7  0x0000003d90093a1c in search_internal_callback_pb (pb=0x7ff6800b23c0,
callback_data=<value optimized out>, prc=0x7ff772f32460 <get_result>,
psec=0x7ff772f32d60 <send_entry>, prec=0) at
ldap/servers/slapd/plugin_internal_op.c:812
#8  0x00007ff772f32af4 in repl5_tot_run (prp=0x2955870) at
ldap/servers/plugins/replication/repl5_tot_protocol.c:446
#9  0x00007ff772f20f9a in prot_thread_main (arg=0x2954c40) at
ldap/servers/plugins/replication/repl5_protocol.c:355
#10 0x0000003bc8e299a6 in _pt_root (arg=0x2955a20) at
../../../nspr/pr/src/pthreads/ptthread.c:204
#11 0x0000003c9a0079d1 in start_thread (arg=0x7ff6a0dfa700) at
pthread_create.c:301
#12 0x00000038f22e8b6d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:115
(gdb)


   ?2067     * __dbc_get_pp --
   ?2068     *      DBC->get pre/post processing.
   ?2069     *
   ?2070     * PUBLIC: int __dbc_get_pp __P((DBC *, DBT *, DBT *, u_int32_t));
   ?2071     */
   ?2072    int
   ?2073    __dbc_get_pp(dbc, key, data, flags)
   ?2074            DBC *dbc;
   ?2075            DBT *key, *data;
   ?2076            u_int32_t flags;
   ?2077    {
   ?2078            DB *dbp;
   ?2079            DB_THREAD_INFO *ip;
   ?2080            ENV *env;
   ?2081            int ignore_lease, ret;
   ?2082
   ?2083            dbp = dbc->dbp;
   ?2084            env = dbp->env;
   ?2085
   ?2086            ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
   ?2087            LF_CLR(DB_IGNORE_LEASE);
   ?2088            if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0)
   ?2089                    return (ret);
   ?2090
 +>?2091            ENV_ENTER(env, ip);
   ?2092
   ?2093            DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get",
   ?2094                flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
NULL, flags);
   ?2095            ret = __dbc_get(dbc, key, data, flags);
   ?2096
   ?2097            /*
   ?2098             * Check for master leases.
   ?2099             */
   ?2100            if (ret == 0 &&
   ?2101                IS_REP_MASTER(env) && IS_USING_LEASES(env) &&
!ignore_lease)
   ?2102                    ret = __rep_lease_check(env, 1);
   ?2103
   ?2104            ENV_LEAVE(env, ip);
   ?2105            __dbt_userfree(env, key, NULL, data);
   ?2106            return (ret);
   ?2107    }


(gdb)
(gdb) p ip
$1 = <value optimized out>
(gdb) p env
$2 = (ENV *) 0x0
(gdb)
(gdb) p dbp
$3 = <value optimized out>

(gdb) p *dbc
$5 = {dbp = 0x1c2cc60, dbenv = 0x1db93a0, env = 0x1f15f40, thread_info = 0x0,
txn = 0x0, priority = DB_PRIORITY_UNCHANGED, links = {tqe_next = 0x0, tqe_prev
= 0x1c6a500}, rskey = 0x1c2e128, rkey = 0x1c2e150, rdata = 0x1c2e178,
  my_rskey = {data = 0x0, size = 0, ulen = 0, dlen = 0, doff = 0, app_data =
0x0, flags = 0}, my_rkey = {data = 0x0, size = 0, ulen = 0, dlen = 0, doff = 0,
app_data = 0x0, flags = 0}, my_rdata = {data = 0x0, size = 0, ulen = 0,
    dlen = 0, doff = 0, app_data = 0x0, flags = 0}, lref = 0x7ff75d095bd8,
locker = 0x7ff75d095bd8, lock_dbt = {data = 0x1c2e1d8, size = 28, ulen = 0,
dlen = 0, doff = 0, app_data = 0x0, flags = 0}, lock = {pgno = 35,
    fileid =
"\336\367\003\000\001\375\000\000\206g\332\006\232\250\001\000\000\000\000",
type = 3}, mylock = {off = 0, ndx = 0, gen = 0, mode = DB_LOCK_NG}, cl_id = 0,
dbtype = DB_BTREE, internal = 0x1c2cc70,
  close = 0x7ff774fb5d50 <__dbc_close_pp>, count = 0x7ff774fb5c60
<__dbc_count_pp>, del = 0x7ff774fb5ac0 <__dbc_del_pp>, dup = 0x7ff774fb59e0
<__dbc_dup_pp>, get = 0x7ff774fb5870 <__dbc_get_pp>,
  get_priority = 0x7ff774fa02c0 <__dbc_get_priority>, pget = 0x7ff774fb5410
<__dbc_pget_pp>, put = 0x7ff774fb4d10 <__dbc_put_pp>, set_priority =
0x7ff774fa02b0 <__dbc_set_priority>, c_close = 0x7ff774fb5d50 <__dbc_close_pp>,
  c_count = 0x7ff774fb5c60 <__dbc_count_pp>, c_del = 0x7ff774fb5ac0
<__dbc_del_pp>, c_dup = 0x7ff774fb59e0 <__dbc_dup_pp>, c_get = 0x7ff774fb5870
<__dbc_get_pp>, c_pget = 0x7ff774fb5410 <__dbc_pget_pp>,
  c_put = 0x7ff774fb4d10 <__dbc_put_pp>, am_bulk = 0x7ff774f054c0 <__bam_bulk>,
am_close = 0x7ff774f06db0 <__bamc_close>, am_del = 0x7ff774f04990 <__bamc_del>,
am_destroy = 0x7ff774f04790 <__bamc_destroy>,
  am_get = 0x7ff774f03fb0 <__bamc_get>, am_put = 0x7ff774f06030 <__bamc_put>,
am_writelock = 0x7ff774f03060 <__bamc_writelock>, flags = 64}
(gdb)

see the env = 0x0

(gdb)  p *dbc->dbp
$6 = {pgsize = 0, priority = DB_PRIORITY_UNCHANGED, db_append_recno = 0x171,
db_feedback = 0x1c6cb70, dup_compare = 0x1c2e0c0, app_private = 0x1ded00000001,
dbenv = 0x10, env = 0x0, type = 10848, mpf = 0x1, mutex = 0,
  fname = 0x1c2ccc8 "\240??m\367\177", dname = 0x1c2ccc8 "\240?m\367\177",
open_flags = 29543864, fileid =
"\000\000\000\000\240?m\367\177\000\000\020\000\066\001\000\000\000",
adj_fileid = 3173888, log_filename = 0x15c7b00002a60,
  meta_pgno = 1, locker = 0x1, cur_locker = 0x0, cur_txn = 0x0,
associate_locker = 0x0, handle_lock = {off = 0, ndx = 0, gen = 0, mode =
DB_LOCK_NG}, cl_id = 0, timestamp = 0, fid_gen = 0, my_rskey = {data = 0x0,
size = 0, ulen = 0,
    dlen = 0, doff = 0, app_data = 0x0, flags = 0}, my_rkey = {data = 0x0, size
= 0, ulen = 0, dlen = 0, doff = 0, app_data = 0x0, flags = 0}, my_rdata = {data
= 0x0, size = 0, ulen = 0, dlen = 0, doff = 0, app_data = 0x0, flags = 0},
  saved_open_fhp = 0x7ef, dblistlinks = {tqe_next = 0x0, tqe_prev = 0x1c6aab0},
free_queue = {tqh_first = 0x170, tqh_last = 0x50}, active_queue = {tqh_first =
0x100000008, tqh_last = 0x1dbc310}, join_queue = {tqh_first = 0x0,
    tqh_last = 0x1db4c00}, s_secondaries = {lh_first = 0x1c8f800}, s_links =
{le_next = 0x7ff7731cd600, le_prev = 0x1c05ed0}, s_refcnt = 29889472,
s_callback = 0x6769666e6f, s_primary = 0xb1, s_assoc_flags = 29544160,
f_primaries = {
    lh_first = 0x2}, s_foreign = 0x0, api_internal = 0x0, bt_internal = 0x0,
h_internal = 0x1c2cf00, q_internal = 0x0, xa_internal = 0x0, associate = 0,
associate_foreign = 0, close = 0x1c2c0a0, compact = 0, cursor = 0x5,
  del = 0x1800bd0, err = 0, errx = 0, exists = 0, fd = 0, get = 0,
get_bt_minkey = 0, get_byteswapped = 0, get_cachesize = 0x21, get_dbname =
0x657571696e55746e, get_encrypt_flags = 0x38f2006449, get_env = 0x6700353239,
  get_errcall = 0x21, get_errfile = 0x186a0, get_errpfx = 0, get_flags = 0,
get_h_ffactor = 0x31, get_h_nelem = 0x1c2ce30, get_lorder = 0x38f258fe00
<_IO_stdfile_0_lock>, get_mpf = 0x1c93580, get_msgfile = 0x1c2cfb0,
  get_multiple = 0x6769666e6f, get_open_flags = 0x21, get_pagesize = 0x1c8b860,
get_priority = 0x62642e7972746e65, get_q_extentsize = 0x34, get_re_delim =
0x41, get_re_len = 0x72726573753d6e63, get_re_pad = 0x6c3d6e632c746f6f,
  get_re_source = 0x61746164206d6264, get_transactional = 0x3d6e632c65736162,
get_type = 0x2c736e6967756c70, join = 0x69666e6f633d6e63, key_range = 0x67,
open = 0x31, pget = 0x1c2d000, put = 0x7373616c00, remove = 0, rename = 0,
  set_alloc = 0x1de94d0, set_append_recno = 0x21, set_bt_compare =
0x6f4472657355746e, set_bt_minkey = 0x64496e69616d, set_bt_prefix = 0x1c2d120,
set_cachesize = 0xb1, set_dup_compare = 0x1c2d0b0, set_encrypt = 0x2,
set_errcall = 0,
  set_errfile = 0, set_errpfx = 0, set_feedback = 0x1c2d0d0, set_flags = 0,
set_h_compare = 0, set_h_ffactor = 0, set_h_hash = 0, set_h_nelem = 0x1c2cfe0,
set_lorder = 0, set_msgcall = 0x5, set_msgfile = 0x1800bd0, set_pagesize = 0,
  set_paniccall = 0, set_priority = 0, set_q_extentsize = 0, set_re_delim = 0,
set_re_len = 0, set_re_pad = 0, set_re_source = 0x21, stat =
0x6f4472657355746e, stat_print = 0x64496e69616d, sync = 0x1c72020, truncate =
0x21,
  upgrade = 0x186a0, verify = 0, dump = 0, db_am_remove = 0x21, db_am_rename =
0x637463656a626f28, stored_get = 0x292a3d7373616c, stored_close = 0, am_ok =
33, preserve_fid = 0, orig_flags = 1784835880, flags = 1668572005}
(gdb)


Expected results:


Additional info:

git merge ticket47729
Updating 82b12c9..e7f7e91
Fast-forward
.../plugins/replication/repl5_tot_protocol.c | 6 +-----
1 files changed, 1 insertions(+), 5 deletions(-)

git push origin master
82b12c9..e7f7e91 master -> master

commit e7f7e91
Author: Mark Reynolds mreynolds@redhat.com
Date: Fri Mar 7 14:07:01 2014 -0500

1.3.2

5825292..c40e1cd 389-ds-base-1.3.2 -> 389-ds-base-1.3.2

1.3.1

86fbd5a..de8d28b 389-ds-base-1.3.1 -> 389-ds-base-1.3.1

1.3.0

58e5152..9ac3edb 389-ds-base-1.3.0 -> 389-ds-base-1.3.0

1.2.11

6a24433..e0fd1ff 389-ds-base-1.2.11 -> 389-ds-base-1.2.11

Metadata Update from @mreynolds:
- Issue assigned to mreynolds
- Issue set to the milestone: 1.2.11.26

7 years ago

389-ds-base is moving from Pagure to Github. This means that new issues and pull requests
will be accepted only in 389-ds-base's github repository.

This issue has been cloned to Github and is available here:
- https://github.com/389ds/389-ds-base/issues/1062

If you want to receive further updates on the issue, please navigate to the github issue
and click on subscribe button.

Thank you for understanding. We apologize for all inconvenience.

Metadata Update from @spichugi:
- Issue close_status updated to: wontfix (was: Fixed)

3 years ago

Login to comment on this ticket.

Metadata