当我尝试从eshell启动时,我的主管崩溃了?

10

我对OTP非常陌生,我正在尝试创建一个简单的示例来理解监督器行为:

这里是一个简单的增量服务器。

-module( inc_serv ).
-behaviour( gen_server ).
-export( [ start/0, inc/1, stop/0 ] ).
-export( [ init/1, handle_call/3, terminate/2 ] ).

start() ->
        gen_server:start_link( { local, ?MODULE }, ?MODULE, no_args, [] ).

stop() ->
        gen_server:call( ?MODULE, stop ).

inc( Num ) ->
        gen_server:call( ?MODULE, { num, Num } ).

init( no_args ) ->
        io:format( "~p~n", [ "Increment server started :)" ] ),
        { ok, no_state }.

handle_call( { num, Num }, _From, no_state ) ->
        { reply, Num + 1, no_state };
handle_call( stop, _From, no_state ) ->
        { stop, normal, ok, no_state }.

terminate( Reason, no_state ) ->
        io:format( "~p~n", [ "Increment server stopped" ] ).

我希望用这个模块来监督它:

-module( supervisor_inc ).
-behaviour( supervisor ).

-export( [ start/0 ] ).
-export( [ init/1 ] ).

start() ->
        supervisor:start_link( { local, ?MODULE }, ?MODULE, no_args ).

init( no_args ) ->
        process_flag( trap_exit, true ),
        Supervisor_Spec = { one_for_one, 1, 1 },
        IncServ_Spec = {
                inc_serv,
                { inc_serv, start, [] },
                permanent, 2000, worker, [ inc_serv ] },
        { ok, { Supervisor_Spec, [ IncServ_Spec ] } }.

之后我在Erlang shell中执行了以下步骤:

1> 
1> c(inc_serv).
{ok,inc_serv}
2> 
2> c(supervisor_inc).
{ok,supervisor_inc}
3> 
3> supervisor_inc:start().
"Increment server started :)"
{ok,<0.43.0>}
4> 
4> inc_serv:inc( 7 ).
8
5> inc_serv:inc( 8 ).
9

接下来我尝试了下面的方法(如预期的一样,我遇到了错误):

6> inc_serv:inc( bad_arg ).
"Increment server stopped"
"Increment server started :)"

=ERROR REPORT==== 23-Aug-2012::19:32:06 ===
** Generic server inc_serv terminating 
** Last message in was {num,bad_arg}
** When Server state == no_state
** Reason for termination == 
** {badarith,[{inc_serv,handle_call,3,[{file,"inc_serv.erl"},{line,22}]},
              {gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,588}]},
              {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,227}]}]}

=ERROR REPORT==== 23-Aug-2012::19:32:06 ===
** Generic server supervisor_inc terminating 
** Last message in was {'EXIT',<0.31.0>,
                           {{{badarith,
                                 [{inc_serv,handle_call,3,
                                      [{file,"inc_serv.erl"},{line,22}]},
                                  {gen_server,handle_msg,5,
                                      [{file,"gen_server.erl"},{line,588}]},
                                  {proc_lib,init_p_do_apply,3,
                                      [{file,"proc_lib.erl"},{line,227}]}]},
                             {gen_server,call,[inc_serv,{num,bad_arg}]}},
                            [{gen_server,call,2,
                                 [{file,"gen_server.erl"},{line,180}]},
                             {erl_eval,do_apply,6,
                                 [{file,"erl_eval.erl"},{line,576}]},
                             {shell,exprs,7,[{file,"shell.erl"},{line,668}]},
                             {shell,eval_exprs,7,
                                 [{file,"shell.erl"},{line,623}]},
                             {shell,eval_loop,3,
                                 [{file,"shell.erl"},{line,608}]}]}}
** When Server state == {state,
                            {local,supervisor_inc},
                            one_for_one,
                            [{child,<0.48.0>,inc_serv,
                                 {inc_serv,start,[]},
                                 permanent,2000,worker,
                                 [inc_serv]}],
                            undefined,1,1,
                            [{1345,739526,107495}],
                            supervisor_inc,no_args}
** Reason for termination == 
** {{{badarith,[{inc_serv,handle_call,3,[{file,"inc_serv.erl"},{line,22}]},
                {gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,588}]},
                {proc_lib,init_p_do_apply,3,
                          [{file,"proc_lib.erl"},{line,227}]}]},
     {gen_server,call,[inc_serv,{num,bad_arg}]}},
    [{gen_server,call,2,[{file,"gen_server.erl"},{line,180}]},
     {erl_eval,do_apply,6,[{file,"erl_eval.erl"},{line,576}]},
     {shell,exprs,7,[{file,"shell.erl"},{line,668}]},
     {shell,eval_exprs,7,[{file,"shell.erl"},{line,623}]},
     {shell,eval_loop,3,[{file,"shell.erl"},{line,608}]}]}
** exception exit: {{badarith,[{inc_serv,handle_call,3,
                                         [{file,"inc_serv.erl"},{line,22}]},
                               {gen_server,handle_msg,5,
                                           [{file,"gen_server.erl"},{line,588}]},
                               {proc_lib,init_p_do_apply,3,
                                         [{file,"proc_lib.erl"},{line,227}]}]},
                    {gen_server,call,[inc_serv,{num,bad_arg}]}}
     in function  gen_server:call/2 (gen_server.erl, line 180)

之后我期望 - 我的主管将重新启动 inc_serv。但他没有这样做:

7> inc_serv:inc( 8 ).      
** exception exit: {noproc,{gen_server,call,[inc_serv,{num,8}]}}
     in function  gen_server:call/2 (gen_server.erl, line 180)

你能帮我理解发生了什么吗?还有我应该如何重写我的监管程序,以便能够重新启动inc_serv

谢谢

1个回答

阿里云服务器只需要99元/年,新老用户同享,点击查看详情
23

这实际上是一种竞争条件。

您可能知道,Erlang shell 本身是一个普通的 Erlang 进程。当您从 shell 启动您的 supervisor 时,supervisor 与 shell 相关联(因为您使用了 supervisor:start_link/3)。

当您调用 gen_server 进程时,该进程会崩溃(并且如您在随后的"Increment server started :)"输出中看到的那样,由 supervisor 正确地重新启动)。

然而,同时,您对 gen_server:call/2 的调用也将导致相同的崩溃(gen_server 在调用期间崩溃将通过 gen_server:call/2 函数发出相同的崩溃)。然后,它会使关联到您的 supervisor 的 shell 进程崩溃,进而再次以相同的原因(badarith)导致 supervisor 崩溃。

基本上,您的 supervisor 在忠实地重新启动 gen_server 后被您的 shell 进程背叛。就像这样:

       +---------(6)exit----------+    +---------(5)restart---------+
       |                          |    |                            |
       |                          v    |                            v
     Shell ---(1)start_link---> supervisor ---(2)start_link---> gen_server
     |  ^                         ^    |                         ^  |   ^
     |  |                         |    |                         |  |   |
     |  |                         |    +---------(7)exit---------+  |   |
     |  |                         |                                 |   |
     |  +-------------------------+--------------(4)exit------------+   |
     |                                                                  |
     +---------------------------(3)call--------------------------------+

你可以通过在shell中调用 catch inc_serv:inc(bad_arg) 来避免这种情况:

90> inc_serv:inc(7).        
8
91> catch inc_serv:inc(bad_arg).
"Increment server stopped"

=ERROR REPORT==== 23-Aug-2012::22:10:02 ===
** Generic server inc_serv terminating 
** Last message in was {num,bad_arg}
** When Server state == no_state
** Reason for termination == 
** {badarith,[{inc_serv,handle_call,3,[{file,"inc_serv.erl"},{line,20}]},
              {gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,588}]},
              {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,227}]}]}
"Increment server started :)"
{'EXIT',{{badarith,[{inc_serv,handle_call,3,
                              [{file,"inc_serv.erl"},{line,20}]},
                    {gen_server,handle_msg,5,
                              [{file,"gen_server.erl"},{line,588}]},
                    {proc_lib,init_p_do_apply,3,
                              [{file,"proc_lib.erl"},{line,227}]}]},
                    {gen_server,call,[inc_serv,{num,bad_arg}]}}}
92> inc_serv:inc(7).            
8

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,