<!-- 
RSS generated by JIRA (8.20.10#820010-sha1:ace47f9899e9ee25d7157d59aa17ab06aee30d3d) at Wed Feb 07 19:56:21 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>OpenDaylight JIRA</title>
    <link>https://jira.opendaylight.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>8.20.10</version>
        <build-number>820010</build-number>
        <build-date>22-06-2022</build-date>
    </build-info>


<item>
            <title>[CONTROLLER-1751] Sporadic cluster failure when member is restarted in OF cluster test</title>
                <link>https://jira.opendaylight.org/browse/CONTROLLER-1751</link>
                <project id="10113" key="CONTROLLER">controller</project>
                    <description>&lt;p&gt;Please see attached karaf log. We see this behavior sporadically at different places in the OpenFlow cluster test after we kill an instance and bring it back. Please help identifying the root cause.&lt;/p&gt;</description>
                <environment>&lt;p&gt;Operating System: All&lt;br/&gt;
Platform: All&lt;/p&gt;</environment>
        <key id="26305">CONTROLLER-1751</key>
            <summary>Sporadic cluster failure when member is restarted in OF cluster test</summary>
                <type id="10104" iconUrl="https://jira.opendaylight.org/secure/viewavatar?size=xsmall&amp;avatarId=10303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.opendaylight.org/images/icons/priorities/blocker.svg">Highest</priority>
                        <status id="5" iconUrl="https://jira.opendaylight.org/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="green"/>
                                    <resolution id="10000">Done</resolution>
                                        <assignee username="ecelgp">Luis Gomez</assignee>
                                    <reporter username="ecelgp">Luis Gomez</reporter>
                        <labels>
                    </labels>
                <created>Thu, 17 Aug 2017 17:47:53 +0000</created>
                <updated>Tue, 25 Jul 2023 08:24:46 +0000</updated>
                            <resolved>Thu, 24 May 2018 02:03:38 +0000</resolved>
                                                                    <component>clustering</component>
                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                                                                <comments>
                            <comment id="52587" author="ecelgp" created="Thu, 17 Aug 2017 17:51:43 +0000"  >&lt;p&gt;Attachment karaf_member_restart.txt has been added with description: karaf log for restarted member&lt;/p&gt;</comment>
                            <comment id="52552" author="ecelgp" created="Thu, 17 Aug 2017 17:55:18 +0000"  >&lt;p&gt;WARN messages in attached karaf log:&lt;/p&gt;

&lt;p&gt;2017-08-15 14:29:10,004 | WARN  | saction-28-30&apos;}} | DeadlockMonitor                  | 119 - org.opendaylight.controller.config-manager - 0.6.2.Carbon | ModuleIdentifier&lt;/p&gt;
{factoryName=&apos;runtime-generated-mapping&apos;, instanceName=&apos;runtime-mapping-singleton&apos;} did not finish after 274970 ms&lt;br/&gt;
2017-08-15 14:29:15,004 | WARN  | saction-28-30&apos;}} | DeadlockMonitor                  | 119 - org.opendaylight.controller.config-manager - 0.6.2.Carbon | ModuleIdentifier{factoryName=&apos;runtime-generated-mapping&apos;, instanceName=&apos;runtime-mapping-singleton&apos;}
&lt;p&gt; did not finish after 279970 ms&lt;/p&gt;</comment>
                            <comment id="52553" author="vrpolak" created="Mon, 21 Aug 2017 12:44:41 +0000"  >&lt;p&gt;A place in Robot test showing member-2 not syncing within 5 minutes: &lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-carbon/733/log.html.gz#s1-s1-t34-k2-k2-k8-k1-k2-k1-k1-k6-k1&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-carbon/733/log.html.gz#s1-s1-t34-k2-k2-k8-k1-k2-k1-k1-k6-k1&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52554" author="vrpolak" created="Mon, 21 Aug 2017 13:19:00 +0000"  >&lt;p&gt;This is also happening in Netconf &lt;span class=&quot;error&quot;&gt;&amp;#91;1&amp;#93;&lt;/span&gt;, and similar symptom is occasionally seen in Carbon &lt;span class=&quot;error&quot;&gt;&amp;#91;2&amp;#93;&lt;/span&gt;.&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;1&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/netconf-csit-3node-clustering-only-nitrogen/116/log.html.gz#s1-s7-t19-k2-k2-k8-k1-k2-k1-k1-k6-k1&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/netconf-csit-3node-clustering-only-nitrogen/116/log.html.gz#s1-s7-t19-k2-k2-k8-k1-k2-k1-k1-k6-k1&lt;/a&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;2&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://bugs.opendaylight.org/show_bug.cgi?id=8999&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://bugs.opendaylight.org/show_bug.cgi?id=8999&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52555" author="shague@redhat.com" created="Thu, 24 Aug 2017 12:40:42 +0000"  >&lt;p&gt;Issue also seen in Netvirt csit: &lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/netvirt-csit-3node-openstack-ocata-gate-stateful-carbon/12/log.html.gz#s1-s2-s2-t7&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/netvirt-csit-3node-openstack-ocata-gate-stateful-carbon/12/log.html.gz#s1-s2-s2-t7&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52556" author="tpantelis" created="Thu, 24 Aug 2017 12:55:52 +0000"  >&lt;p&gt;All the shards timed out and went to Candidate and stayed there repeatedly  starting new elections. This means it could not connect to any of the other nodes. Since this intermittent, it seems like an issue in the jenkins environment. &lt;/p&gt;

&lt;p&gt;I think the CSS timeout is a consequence of this as we block CDS bundle startup while waiting for shards to obtain leaders.&lt;/p&gt;</comment>
                            <comment id="52557" author="tpantelis" created="Thu, 24 Aug 2017 14:15:54 +0000"  >&lt;p&gt;It looks like it couldn&apos;t contact any other seed node in time so joined itself and made itself leader. Since I assume ODL1 is the first seed node, it can do that. This wouldn&apos;t occur with the other 2 nodes.&lt;/p&gt;

&lt;p&gt;2017-08-15 14:24:55,472 | INFO  | ult-dispatcher-2 | kka://opendaylight-cluster-data) | 171 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.15.103:2550&amp;#93;&lt;/span&gt; - Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.15.103:2550&amp;#93;&lt;/span&gt; is JOINING, roles &lt;span class=&quot;error&quot;&gt;&amp;#91;member-1&amp;#93;&lt;/span&gt;&lt;br/&gt;
2017-08-15 14:24:55,480 | INFO  | ult-dispatcher-2 | kka://opendaylight-cluster-data) | 171 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.15.103:2550&amp;#93;&lt;/span&gt; - Leader is moving node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.15.103:2550&amp;#93;&lt;/span&gt; to &lt;span class=&quot;error&quot;&gt;&amp;#91;Up&amp;#93;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="52558" author="ecelgp" created="Thu, 24 Aug 2017 16:54:43 +0000"  >&lt;p&gt;Tom, this is weird because all we do in these tests is to kill the java process running the karaf container and after that start karaf again. So if there was an issue in the network it should be there all the time not just at the moment of the test.&lt;/p&gt;</comment>
                            <comment id="52559" author="tpantelis" created="Thu, 24 Aug 2017 17:25:03 +0000"  >&lt;p&gt;yeah it seems weird. I&apos;m just noting what I saw in the log - ODL1 joined itself which means it didn&apos;t connect to another node within a period of time. I don&apos;t know why. We don&apos;t have the logs from the other nodes. Perhaps something in those logs might yield something, eg maybe they refused to let ODL1 join for some reason. Or maybe the seed-node-timeout need to be increased.&lt;/p&gt;</comment>
                            <comment id="52560" author="ecelgp" created="Thu, 24 Aug 2017 17:48:08 +0000"  >&lt;p&gt;Looking at these recent logs:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/118/odl1_karaf.log.gz&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/118/odl1_karaf.log.gz&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/118/odl2_karaf.log.gz&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/118/odl2_karaf.log.gz&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/118/odl3_karaf.log.gz&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/118/odl3_karaf.log.gz&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The restarted member 2 starts listening to 2550 at 14:27:58:&lt;/p&gt;

&lt;p&gt;2017-08-23 14:27:58,670 | INFO  | ult-dispatcher-3 | Remoting                         | 41 - com.typesafe.akka.slf4j - 2.4.18 | Remoting started; listening on addresses :&lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.14.87:2550&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;The other member 1 sends connection attempts every 5 secs:&lt;/p&gt;

&lt;p&gt;2017-08-23 14:27:53,898 | WARN  | lt-dispatcher-48 | ReliableDeliverySupervisor       | 41 - com.typesafe.akka.slf4j - 2.4.18 | Association with remote system &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.14.87:2550&amp;#93;&lt;/span&gt; has failed, address is now gated for &lt;span class=&quot;error&quot;&gt;&amp;#91;5000&amp;#93;&lt;/span&gt; ms. Reason: [Association failed with &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.14.87:2550&amp;#93;&lt;/span&gt;] Caused by: &lt;span class=&quot;error&quot;&gt;&amp;#91;Connection refused: /10.29.14.87:2550&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;But almost same time member 2 starts listening to 2550 I see these errors in member 1 and no more connection attempts are being sent:&lt;/p&gt;

&lt;p&gt;2017-08-23 14:27:57,593 | INFO  | lt-dispatcher-48 | kka://opendaylight-cluster-data) | 41 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&amp;#93;&lt;/span&gt; - Leader can currently not perform its duties, reachability status: [akka.tcp://opendaylight-cluster-data@10.29.12.157:2550 -&amp;gt; akka.tcp://opendaylight-cluster-data@10.29.14.87:2550: Unreachable &lt;span class=&quot;error&quot;&gt;&amp;#91;Unreachable&amp;#93;&lt;/span&gt; (1), akka.tcp://opendaylight-cluster-data@10.29.15.229:2550 -&amp;gt; akka.tcp://opendaylight-cluster-data@10.29.14.87:2550: Unreachable &lt;span class=&quot;error&quot;&gt;&amp;#91;Unreachable&amp;#93;&lt;/span&gt; (1)], member status: &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550 Up seen=true, akka.tcp://opendaylight-cluster-data@10.29.14.87:2550 Up seen=false, akka.tcp://opendaylight-cluster-data@10.29.15.229:2550 Up seen=true&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;2017-08-23 14:28:57,606 | INFO  | ult-dispatcher-4 | kka://opendaylight-cluster-data) | 41 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&amp;#93;&lt;/span&gt; - Leader can currently not perform its duties, reachability status: [akka.tcp://opendaylight-cluster-data@10.29.12.157:2550 -&amp;gt; akka.tcp://opendaylight-cluster-data@10.29.14.87:2550: Unreachable &lt;span class=&quot;error&quot;&gt;&amp;#91;Unreachable&amp;#93;&lt;/span&gt; (1), akka.tcp://opendaylight-cluster-data@10.29.15.229:2550 -&amp;gt; akka.tcp://opendaylight-cluster-data@10.29.14.87:2550: Unreachable &lt;span class=&quot;error&quot;&gt;&amp;#91;Unreachable&amp;#93;&lt;/span&gt; (1)], member status: &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550 Up seen=true, akka.tcp://opendaylight-cluster-data@10.29.14.87:2550 Up seen=false, akka.tcp://opendaylight-cluster-data@10.29.15.229:2550 Up seen=true&amp;#93;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="52561" author="tpantelis" created="Thu, 24 Aug 2017 18:59:34 +0000"  >&lt;p&gt;&quot;Leader can currently not perform its duties&quot; is normal when a node becomes unreachable. By duties, it means it can&apos;t let any new nodes join the cluster until previously unreachable re-connect/re-join or are &quot;downed&quot; and removed from the cluster..&lt;/p&gt;

&lt;p&gt;On member-2, it restarted around 2017-08-23 14:27:45:&lt;/p&gt;

&lt;p&gt;2017-08-23 14:28:22,849 | WARN  | lt-dispatcher-23 | JoinSeedNodeProcess              | 41 - com.typesafe.akka.slf4j - 2.4.18 | Couldn&apos;t join seed nodes after &lt;span class=&quot;error&quot;&gt;&amp;#91;2&amp;#93;&lt;/span&gt; attmpts, will try again. seed-nodes=&lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550, akka.tcp://opendaylight-cluster-data@10.29.15.229:2550&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;This continued for 87 attempts until it apparently gave up:&lt;/p&gt;

&lt;p&gt;2017-08-23 14:45:27,598 | WARN  | lt-dispatcher-21 | ReliableDeliverySupervisor       | 41 - com.typesafe.akka.slf4j - 2.4.18 | Association with remote system &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&amp;#93;&lt;/span&gt; has failed, address is now gated for &lt;span class=&quot;error&quot;&gt;&amp;#91;5000&amp;#93;&lt;/span&gt; ms. Reason: &lt;span class=&quot;error&quot;&gt;&amp;#91;Disassociated&amp;#93;&lt;/span&gt; &lt;/p&gt;

&lt;p&gt;2017-08-23 14:45:46,975 | WARN  | lt-dispatcher-26 | NettyTransport                   | 41 - com.typesafe.akka.slf4j - 2.4.18 | Remote connection to null failed with java.net.ConnectException: Connection refused: /10.29.12.157:2550&lt;/p&gt;


&lt;p&gt;2017-08-23 14:45:46,983 | WARN  | ult-dispatcher-4 | ReliableDeliverySupervisor       | 41 - com.typesafe.akka.slf4j - 2.4.18 | Association with remote system &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&amp;#93;&lt;/span&gt; has failed, address is now gated for &lt;span class=&quot;error&quot;&gt;&amp;#91;5000&amp;#93;&lt;/span&gt; ms. Reason: [Association failed with &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&amp;#93;&lt;/span&gt;] Caused by: &lt;span class=&quot;error&quot;&gt;&amp;#91;Connection refused: /10.29.12.157:2550&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;Then it finally connects:&lt;/p&gt;

&lt;p&gt;2017-08-23 14:45:58,855 | INFO  | ult-dispatcher-6 | kka://opendaylight-cluster-data) | 41 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.14.87:2550&amp;#93;&lt;/span&gt; - Welcome from &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&amp;#93;&lt;/span&gt;&lt;br/&gt;
2017-08-23 14:45:58,864 | INFO  | rd-dispatcher-48 | ShardManager                     | 211 - org.opendaylight.controller.sal-distributed-datastore - 1.6.0 | shard-manager-config: Received MemberUp: memberName: MemberName&lt;/p&gt;
{name=member-1}
&lt;p&gt;, address: akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&lt;/p&gt;

&lt;p&gt;On member-1, it looks like 10.29.15.229 (ODL3 I assume) was also stopped:&lt;/p&gt;

&lt;p&gt;2017-08-23 14:37:51,592 | WARN  | lt-dispatcher-20 | ClusterCoreDaemon                | 41 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&amp;#93;&lt;/span&gt; - Marking node(s) as UNREACHABLE &lt;span class=&quot;error&quot;&gt;&amp;#91;Member(address = akka.tcp://opendaylight-cluster-data@10.29.15.229:2550, status = Up)&amp;#93;&lt;/span&gt;. Node roles &lt;span class=&quot;error&quot;&gt;&amp;#91;member-1&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;In that case, 2 nodes were down so both nodes have to re-connect or be &quot;downed&quot; before it allows any to join. The latter happened for ODL3:&lt;/p&gt;

&lt;p&gt;2017-08-23 14:40:18,419 | INFO  | lt-dispatcher-22 | kka://opendaylight-cluster-data) | 41 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.157:2550&amp;#93;&lt;/span&gt; - Marking unreachable node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.15.229:2550&amp;#93;&lt;/span&gt; as &lt;span class=&quot;error&quot;&gt;&amp;#91;Down&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;Did the test manually down it?&lt;/p&gt;

&lt;p&gt;So it looks like the test stops 2 of the nodes. I&apos;m not sure what the expectations are.&lt;/p&gt;</comment>
                            <comment id="52562" author="ecelgp" created="Thu, 24 Aug 2017 19:17:31 +0000"  >&lt;p&gt;Hi Tom, you are right, we take down member-3 but that happens 10 mins after (14:37:42) we start member-2 (14:27:45). In our test we give max 5 mins for a member to boot up, join the cluster and load all features. Do you think this is not enough? because when the test pass it only takes 30 secs to go through the above.&lt;/p&gt;</comment>
                            <comment id="52563" author="tpantelis" created="Thu, 24 Aug 2017 19:46:06 +0000"  >&lt;p&gt;You can try giving it more time as it did eventually connect although it took over 15 min. Maybe there&apos;s something in the CSIT env (it gets busy?) where network communications get bogged down sometimes. member-2 was trying to continuously connect but was getting &quot;Connection refused&quot; for some reason until it finally connected.&lt;/p&gt;</comment>
                            <comment id="52564" author="ecelgp" created="Thu, 24 Aug 2017 20:07:06 +0000"  >&lt;p&gt;OK, I think as next step I can try to see if this reproduces outside CI.&lt;/p&gt;</comment>
                            <comment id="52565" author="rovarga" created="Thu, 24 Aug 2017 23:19:49 +0000"  >&lt;p&gt;Preliminary target set at Carbon SR3, pending netvirt CSIT (&lt;a href=&quot;https://jira.opendaylight.org/browse/CONTROLLER-1755&quot; title=&quot;RaftActor lastApplied index moves backwards&quot; class=&quot;issue-link&quot; data-issue-key=&quot;CONTROLLER-1755&quot;&gt;&lt;del&gt;CONTROLLER-1755&lt;/del&gt;&lt;/a&gt;) may become a blocker for SR2.&lt;/p&gt;</comment>
                            <comment id="52566" author="shague@redhat.com" created="Fri, 25 Aug 2017 11:26:26 +0000"  >&lt;p&gt;NetVirt is hitting this exact same issue. NetVirt tests copied the openflowplugin test pattern to take a node down and bring it back. Then wait 5 minutes. What I don&apos;t understand is why taking 1 node down out of the three leads to instability? We have three nodes in the cluster. Take 1 down leave other 2 alone. Attempt to bring back the 1 node, wait 5 minutes, that fails and now the cluster is in a bad state causing the further tests to fail.&lt;/p&gt;

&lt;p&gt;Sometimes there are random failures where a node does not come back properly such as in job &lt;span class=&quot;error&quot;&gt;&amp;#91;4&amp;#93;&lt;/span&gt;. We try to bring ODL1 back into the cluster but it fails to come back within 5 minutes. Then we move to the next tests and they fail. That ODL1 is hitting the below issue. Is there anything we can do to get past that? We can increase the timeout but why is the cluster in a bad shape? I don&apos;t think the infra is loaded since everything else is moving along properly - the robot vm is driving the other two nodes. We can also see odl1 restarting but taking it&apos;s time in the failing case.&lt;/p&gt;

&lt;p&gt;2017-08-25 02:02:38,430 | WARN  | saction-32-34&apos;}} | DeadlockMonitor                  | 126 - org.opendaylight.controller.config-manager - 0.6.2.SNAPSHOT | ModuleIdentifier&lt;/p&gt;
{factoryName=&apos;runtime-generated-mapping&apos;, instanceName=&apos;runtime-mapping-singleton&apos;}
&lt;p&gt; did not finish after 284864 ms&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;2&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://jenkins.opendaylight.org/releng/user/shague/my-views/view/3node/job/netvirt-csit-3node-openstack-ocata-gate-stateful-carbon/25/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jenkins.opendaylight.org/releng/user/shague/my-views/view/3node/job/netvirt-csit-3node-openstack-ocata-gate-stateful-carbon/25/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;3&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://jenkins.opendaylight.org/releng/user/shague/my-views/view/3node/job/netvirt-csit-3node-openstack-ocata-upstream-stateful-carbon/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jenkins.opendaylight.org/releng/user/shague/my-views/view/3node/job/netvirt-csit-3node-openstack-ocata-upstream-stateful-carbon/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;4&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/netvirt-csit-3node-openstack-ocata-gate-stateful-carbon/24/log.html.gz#s1-s1-t13-k2-k2-k8&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/netvirt-csit-3node-openstack-ocata-gate-stateful-carbon/24/log.html.gz#s1-s1-t13-k2-k2-k8&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52567" author="tpantelis" created="Fri, 25 Aug 2017 12:24:41 +0000"  >&lt;p&gt;I assume this is intermittent and not every run? Based on analysis of the controller test run logs that Luis provided, it&apos;s due to connectivity issues between the nodes for whatever reason. I would suggest manually running the tests outside CI and see if it reproduces as Luis noted. Or manually going thru the steps yourself.&lt;/p&gt;</comment>
                            <comment id="52588" author="ecelgp" created="Sun, 27 Aug 2017 22:06:59 +0000"  >&lt;p&gt;Attachment karaf-1.log has been added with description: Member 1 restarting&lt;/p&gt;</comment>
                            <comment id="52589" author="ecelgp" created="Sun, 27 Aug 2017 22:07:34 +0000"  >&lt;p&gt;Attachment karaf-2.log has been added with description: Member 2 cannot connect to member 1&lt;/p&gt;</comment>
                            <comment id="52590" author="ecelgp" created="Sun, 27 Aug 2017 22:08:23 +0000"  >&lt;p&gt;Attachment karaf-3.log has been added with description: Member 3 cannot connect to member 1&lt;/p&gt;</comment>
                            <comment id="52568" author="ecelgp" created="Sun, 27 Aug 2017 22:19:33 +0000"  >&lt;p&gt;I could reproduce the original issue in my local setup (see last 3 attached logs) after trying for 9 times killing/restarting owner instance. From the logs it seems like the restarting instance (member-1) can connect to the other 2 but these cannot connect back to member-1. &lt;/p&gt;

&lt;p&gt;I could also verify the restarting instance did not opened the akka port:&lt;/p&gt;

&lt;p&gt;telnet 127.0.0.1 2550&lt;br/&gt;
Trying 127.0.0.1...&lt;br/&gt;
telnet: Unable to connect to remote host: Connection refused&lt;/p&gt;

&lt;p&gt;Even when you see in the log:&lt;/p&gt;

&lt;p&gt;2017-08-27 21:32:00,780 | INFO  | ult-dispatcher-2 | Remoting                         | 179 - com.typesafe.akka.slf4j - 2.4.18 | Remoting started; listening on addresses :&lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt;&lt;br/&gt;
2017-08-27 21:32:00,797 | INFO  | ult-dispatcher-2 | kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; - Starting up...&lt;br/&gt;
2017-08-27 21:32:00,873 | INFO  | ult-dispatcher-2 | kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; - Registered cluster JMX MBean &lt;span class=&quot;error&quot;&gt;&amp;#91;akka:type=Cluster&amp;#93;&lt;/span&gt;&lt;br/&gt;
2017-08-27 21:32:00,873 | INFO  | ult-dispatcher-2 | kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; - Started up successfully&lt;/p&gt;

&lt;p&gt;So this seems so far a cluster related issue rather than infra issue.&lt;/p&gt;</comment>
                            <comment id="52569" author="ecelgp" created="Sun, 27 Aug 2017 22:32:35 +0000"  >&lt;p&gt;This is the same issue that originated this bug and is collected in this run:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-carbon/733/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-carbon/733/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Same behavior: member-2 restarts at 2:25:07 and it is never &quot;akka&quot; reachable from the other members.&lt;/p&gt;</comment>
                            <comment id="52570" author="klou" created="Fri, 8 Sep 2017 02:21:59 +0000"  >&lt;p&gt;Tom, Can you look at the additional data Luis added and provide input as to what we can do about this problem?  As it stands, this issue will block the Nitrogen release.  Thanks!&lt;/p&gt;</comment>
                            <comment id="52571" author="tpantelis" created="Fri, 8 Sep 2017 11:48:22 +0000"  >&lt;p&gt;I&apos;ll take a look but I&apos;ve already looked at the other logs and can&apos;t ascertain why it can&apos;t connect - I don&apos;t know if it&apos;s an actual network issue or something fluky in akka. I don&apos;t anticipate this run being any different. Perhaps enabling akka debug may help. &lt;/p&gt;

&lt;p&gt;In one run I looked at it did eventually connect but not in time for the test. Luis mentioned he would increase the timeout.&lt;/p&gt;

&lt;p&gt;In another run, ODL1 couldn&apos;t connect to the other nodes so it joined itself. As I mentioned, we can try increasing the seed-node-timeout in the akka.conf for the tests. &lt;/p&gt;

&lt;p&gt;Either way I don&apos;t think this needs to block Nitrogen. There&apos;s nothing that could&apos;ve been introduced in Nitrogen that could&apos;ve caused this. As I&apos;ve mentioned, it may be the environment.&lt;/p&gt;</comment>
                            <comment id="52572" author="tpantelis" created="Fri, 8 Sep 2017 14:31:04 +0000"  >&lt;p&gt;(In reply to Luis Gomez from comment #21)&lt;br/&gt;
&amp;gt; I could reproduce the original issue in my local setup (see last 3 attached&lt;br/&gt;
&amp;gt; logs) after trying for 9 times killing/restarting owner instance. From the&lt;br/&gt;
&amp;gt; logs it seems like the restarting instance (member-1) can connect to the&lt;br/&gt;
&amp;gt; other 2 but these cannot connect back to member-1. &lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; I could also verify the restarting instance did not opened the akka port:&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; telnet 127.0.0.1 2550&lt;br/&gt;
&amp;gt; Trying 127.0.0.1...&lt;br/&gt;
&amp;gt; telnet: Unable to connect to remote host: Connection refused&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; Even when you see in the log:&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; 2017-08-27 21:32:00,780 | INFO  | ult-dispatcher-2 | Remoting               &lt;br/&gt;
&amp;gt; | 179 - com.typesafe.akka.slf4j - 2.4.18 | Remoting started; listening on&lt;br/&gt;
&amp;gt; addresses :&lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; 2017-08-27 21:32:00,797 | INFO  | ult-dispatcher-2 |&lt;br/&gt;
&amp;gt; kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 |&lt;br/&gt;
&amp;gt; Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; -&lt;br/&gt;
&amp;gt; Starting up...&lt;br/&gt;
&amp;gt; 2017-08-27 21:32:00,873 | INFO  | ult-dispatcher-2 |&lt;br/&gt;
&amp;gt; kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 |&lt;br/&gt;
&amp;gt; Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; -&lt;br/&gt;
&amp;gt; Registered cluster JMX MBean &lt;span class=&quot;error&quot;&gt;&amp;#91;akka:type=Cluster&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; 2017-08-27 21:32:00,873 | INFO  | ult-dispatcher-2 |&lt;br/&gt;
&amp;gt; kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 |&lt;br/&gt;
&amp;gt; Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; -&lt;br/&gt;
&amp;gt; Started up successfully&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; So this seems so far a cluster related issue rather than infra issue.&lt;/p&gt;

&lt;p&gt;In this case, member-1 apparently couldn&apos;t connect to another node in time so joined itself about 23s later and formed a single-node island:&lt;/p&gt;

&lt;p&gt;2017-08-27 21:32:23,944 | INFO  | ult-dispatcher-5 | kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; - Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; is JOINING, roles &lt;span class=&quot;error&quot;&gt;&amp;#91;member-1&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;Since member-1 is the first seed node, which has special sematice, it is allowed to do that. &lt;/p&gt;

&lt;p&gt;Looking at member-2, member-1 did initially connect and try to join but member-2 has to first &quot;down&quot; it and remove the old incarnation from the cluster before it allows the new incarnation to join, which it did:&lt;/p&gt;

&lt;p&gt;17-08-27 21:32:01,291 | INFO  | ult-dispatcher-4 | kka://openaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.102:2550&amp;#93;&lt;/span&gt; - New incarnation of existing member &lt;span class=&quot;error&quot;&gt;&amp;#91;Member(address = akka.tcp://opendaylight-cluster-data@192.168.0.101:2550, status = Up)&amp;#93;&lt;/span&gt; is trying to join. Existing will be removed from the cluster and then new member will be allowed to join.&lt;br/&gt;
2017-08-27 21:32:01,292 | INFO  | ult-dispatcher-4 | kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.102:2550&amp;#93;&lt;/span&gt; - Marking unreachable node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; as &lt;span class=&quot;error&quot;&gt;&amp;#91;Down&amp;#93;&lt;/span&gt;&lt;br/&gt;
2017-08-27 21:32:02,376 | INFO  | ult-dispatcher-2 | kka://opendaylight-cluster-data) | 179 - com.typesafe.akka.slf4j - 2.4.18 | Cluster Node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.102:2550&amp;#93;&lt;/span&gt; &lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Leader is removing unreachable node &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;There&apos;s no other log activity for 192.168.0.101 until about 5 min later when akka reports &quot;Connection refused&quot; after member-1 was stopped again. &lt;/p&gt;

&lt;p&gt;2017-08-27 21:36:49,429 | INFO  | lt-dispatcher-22 | Remoting                         | 179 - com.typesafe.akka.slf4j - 2.4.18 | Quarantined address &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@192.168.0.101:2550&amp;#93;&lt;/span&gt; is still unreachable or has not been restarted. Keeping it quarantined.&lt;br/&gt;
2017-08-27 21:36:49,815 | WARN  | ult-dispatcher-4 | NettyTransport                   | 179 - com.typesafe.akka.slf4j - 2.4.18 | Remote connection to null failed with java.net.ConnectException: Connection refused: /192.168.0.101:2550&lt;/p&gt;

&lt;p&gt;From my understanding, member-1 should retry connection every second up to the seed-node-timeout (which we set to 12s by default) and member-2 should eventually let it join if it is able to connect. For some reason it wasn&apos;t able to reconnect/join within that period of time. &lt;/p&gt;

&lt;p&gt;Gary Wu saw this behavior a while back and opened an issue with akka &lt;a href=&quot;https://github.com/akka/akka/issues/18757/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/akka/akka/issues/18757/&lt;/a&gt;. It turned out the default akka seed-node-timeout of 5s was sometimes too low in his docker environment and increasing to 12s alleviated it and we made that our default. &lt;/p&gt;

&lt;p&gt;As Gary mentioned in his environment &quot;dnode0&apos;s startup creates a heavy load, so it&apos;s possible that dnode0 might be affecting the responsiveness of dnode1 and dnode2.&quot;. It may be that even 12s is sometimes too low in the jenkins test environment. I would suggest increasing it even more for the test, try 30s or even 60s.&lt;/p&gt;</comment>
                            <comment id="52573" author="tpantelis" created="Fri, 8 Sep 2017 15:19:26 +0000"  >&lt;p&gt;(In reply to Luis Gomez from comment #22)&lt;br/&gt;
&amp;gt; This is the same issue that originated this bug and is collected in this run:&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; &lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-&lt;/a&gt;&lt;br/&gt;
&amp;gt; clustering-only-carbon/733/&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; Same behavior: member-2 restarts at 2:25:07 and it is never &quot;akka&quot; reachable&lt;br/&gt;
&amp;gt; from the other members.&lt;/p&gt;

&lt;p&gt;In this case member-2 is not the first seed node so it has to join with another and cannot join itself. We see that it tried to connect/join with another node 23 times over 5 min until apparently robot shut it down:&lt;/p&gt;

&lt;p&gt;2017-08-14 02:30:08,093 | WARN  | ult-dispatcher-5 | JoinSeedNodeProcess              | 171 - com.typesafe.akka.slf4j - 2.4.18 | Couldn&apos;t join seed nodes after &lt;span class=&quot;error&quot;&gt;&amp;#91;23&amp;#93;&lt;/span&gt; attmpts, will try again. seed-nodes=&lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.15.94:2550, akka.tcp://opendaylight-cluster-data@10.29.15.142:2550&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;On member-1, during that time we see it try to connect to member-2 every 5s but fails with &quot;Connection refused&quot;:&lt;/p&gt;

&lt;p&gt;2017-08-14 02:25:15,880 | WARN  | lt-dispatcher-23 | ReliableDeliverySupervisor       | 171 - com.typesafe.akka.slf4j - 2.4.18 | Association with remote system &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.200:2550&amp;#93;&lt;/span&gt; has failed, address is now gated for &lt;span class=&quot;error&quot;&gt;&amp;#91;5000&amp;#93;&lt;/span&gt; ms. Reason: [Association failed with &lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.12.200:2550&amp;#93;&lt;/span&gt;] Caused by: &lt;span class=&quot;error&quot;&gt;&amp;#91;Connection refused: /10.29.12.200:2550&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;So it appears neither side was able to connect. It seems to me the environment was overloaded unless there&apos;s some bug in akka which is always possible but just doesn&apos;t seem likely as establishing TCP connections is pretty basic and &quot;Connection refused&quot; emanates from the lower-level TCP/socket layer. In a previous run we saw a node take about 17 min before it finally connected as I noted earlier.&lt;/p&gt;</comment>
                            <comment id="52574" author="ecelgp" created="Fri, 8 Sep 2017 16:08:23 +0000"  >&lt;p&gt;Yes, I believe it is a TCP socket problem as when I reproduced locally the TCP port 2550 never came up for the restarting instance. It could be AKKA issue as I guess this library is ultimately opening/using the port. Tom, is there any DEBUG to troubleshoot this more?&lt;/p&gt;</comment>
                            <comment id="52575" author="tpantelis" created="Fri, 8 Sep 2017 16:38:08 +0000"  >&lt;p&gt;(In reply to Luis Gomez from comment #27)&lt;br/&gt;
&amp;gt; Yes, I believe it is a TCP socket problem as when I reproduced locally the&lt;br/&gt;
&amp;gt; TCP port 2550 never came up for the restarting instance. It could be AKKA&lt;br/&gt;
&amp;gt; issue as I guess this library is ultimately opening/using the port. Tom, is&lt;br/&gt;
&amp;gt; there any DEBUG to troubleshoot this more?&lt;/p&gt;

&lt;p&gt;Akka reports it successfully opened the port and I doubt there could be a bug there - that&apos;s pretty basic and I&apos;ve never seen an issue with that. Maybe it could be the networking layer in the VM environment is overloaded or threads/timers get delayed...&lt;/p&gt;

&lt;p&gt;The fact that akka keeps trying to periodically connect over minutes seems less likely an issue with akka, at least to me. I could see maybe a timing bug or something causing one attempt to fail but many ...?&lt;/p&gt;

&lt;p&gt;You can set akka.loglevel=DEBUG in the akka.conf and also&lt;br/&gt;
&lt;a href=&quot;http://doc.akka.io/docs/akka/2.4.0/scala/logging.html#Auxiliary_remote_logging_options&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://doc.akka.io/docs/akka/2.4.0/scala/logging.html#Auxiliary_remote_logging_options&lt;/a&gt; as was instructed in &lt;a href=&quot;https://github.com/akka/akka/issues/18757/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/akka/akka/issues/18757/&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="52576" author="ecelgp" created="Mon, 11 Sep 2017 19:29:22 +0000"  >&lt;p&gt;OK, I tried to reproduce this one in my local setup and this time it did not fail for the many times I tried. So this puts the issue back to the CI where it is happening sporadically but still after few times. Because of this I think we can downgrade this to CRITICAL for now.&lt;/p&gt;</comment>
                            <comment id="52577" author="ecelgp" created="Thu, 21 Sep 2017 16:54:52 +0000"  >&lt;p&gt;FYI this issue is also present in Boron so all branches:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-periodic-bulkomatic-clustering-daily-only-boron/411/log.html.gz#s1-s3&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-periodic-bulkomatic-clustering-daily-only-boron/411/log.html.gz#s1-s3&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Unfortunately it is difficult to reproduce outside ODL CI, so we cannot discard test env problem.&lt;/p&gt;</comment>
                            <comment id="52578" author="vrpolak" created="Thu, 21 Sep 2017 17:19:41 +0000"  >&lt;p&gt;Some time ago my favorite explanation was that there is a subtle bug in Blueprint definitions somewhere in clustering features, which needs a specific timing to result in this Bug.&lt;/p&gt;

&lt;p&gt;If that is the case (and we still have trouble reproducing the failure reliably in CSIT), perhaps it would be a good idea to fix other Bugs, hoping this would get fix together with the other bug.&lt;/p&gt;

&lt;p&gt;SingleFeatureTest should be verifying blueprint, but some features are blacklisted, some of them related to clustering. Specifically, Odlparent 2.0.4 skips &lt;span class=&quot;error&quot;&gt;&amp;#91;8&amp;#93;&lt;/span&gt;: odl-mdsal-broker-local, odl-mdsal-clustering-commons, odl-mdsal-distributed-datastore, and odl-mdsal-remoterpc-connector.&lt;/p&gt;

&lt;p&gt;(This list is strangely similar to features affected by &lt;span class=&quot;error&quot;&gt;&amp;#91;9&amp;#93;&lt;/span&gt;, no idea if there is a causal relation.)&lt;/p&gt;

&lt;p&gt;There is already &lt;a href=&quot;https://jira.opendaylight.org/browse/CONTROLLER-1584&quot; title=&quot;Fix broken controller features failing the new extended SingleFeatureTest incl. TestBundleDiag due to IllegalStateException: ./configuration/initial/akka.conf is missing&quot; class=&quot;issue-link&quot; data-issue-key=&quot;CONTROLLER-1584&quot;&gt;&lt;del&gt;CONTROLLER-1584&lt;/del&gt;&lt;/a&gt; opened for not skipping those features, so perhaps we should attempt to tackle &lt;a href=&quot;https://jira.opendaylight.org/browse/CONTROLLER-1584&quot; title=&quot;Fix broken controller features failing the new extended SingleFeatureTest incl. TestBundleDiag due to IllegalStateException: ./configuration/initial/akka.conf is missing&quot; class=&quot;issue-link&quot; data-issue-key=&quot;CONTROLLER-1584&quot;&gt;&lt;del&gt;CONTROLLER-1584&lt;/del&gt;&lt;/a&gt; (again) before getting back to this? I recall the SFT failures on those features were quite reliable.&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;8&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://github.com/opendaylight/odlparent/blob/v2.0.4/features-test/src/main/java/org/opendaylight/odlparent/featuretest/SingleFeatureTest.java#L404-L409&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/opendaylight/odlparent/blob/v2.0.4/features-test/src/main/java/org/opendaylight/odlparent/featuretest/SingleFeatureTest.java#L404-L409&lt;/a&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;9&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://git.opendaylight.org/gerrit/63329&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/63329&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52579" author="tpantelis" created="Thu, 21 Sep 2017 21:18:30 +0000"  >&lt;p&gt;This is an issue with akka sometimes not being able to connect to other nodes or, at least, it takes longer than expected. From all the logs I&apos;ve looked at it appears to be at the TCP level. It is not related to blueprint wiring or anything in ODL code. As Luis mentioned, it is also present in Boron which indicates it is not tied to any akka version or the karaf 4 upgrade. From everything I can tell, it seems related to the CSIT env, ie intermittent load. I would suggest adjusting time outs to take that into account.&lt;/p&gt;</comment>
                            <comment id="52580" author="ecelgp" created="Wed, 27 Sep 2017 00:25:14 +0000"  >&lt;p&gt;OK, I am trying to reproduce the issue in sandbox so I can apply some of the suggested cluster configurations but so far I am also having trouble to reproduce there &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.opendaylight.org/images/icons/emoticons/sad.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;</comment>
                            <comment id="52581" author="vrpolak" created="Thu, 28 Sep 2017 12:48:02 +0000"  >&lt;p&gt;&amp;gt; akka sometimes not being able to connect to other nodes or,&lt;br/&gt;
&amp;gt; at least, it takes longer than expected.&lt;/p&gt;

&lt;p&gt;Is there a logging setting which would enable us to compare such connect timings (without spamming karaf.log too much)?&lt;br/&gt;
Possibly without additional changes such as &lt;span class=&quot;error&quot;&gt;&amp;#91;10&amp;#93;&lt;/span&gt;?&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;10&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://git.opendaylight.org/gerrit/60727&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/60727&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52582" author="ecelgp" created="Thu, 12 Oct 2017 08:17:19 +0000"  >&lt;p&gt;After adding some network connection printouts (netstat) in the test, I found a pattern for the failure: Every time a restarting member fails to join I can see there are 2 connection (inbound and outbound) established between the restarting member and the same remote member (see 2 first lines below):&lt;/p&gt;

&lt;p&gt;tcp6       0      0 10.29.15.190:2550       10.29.15.107:58300      ESTABLISHED 12020/java          &lt;br/&gt;
tcp6       0      0 10.29.15.190:44008      10.29.15.107:2550       ESTABLISHED 12020/java&lt;/p&gt;

&lt;p&gt;tcp6       0      0 10.29.15.190:47210      10.29.15.113:2550       ESTABLISHED 12020/java          &lt;/p&gt;

&lt;p&gt;This is weird as normally 1 connection (inbound or outbound) is enough to setup the akka association. I am still investigating and adding more printouts to understand how this double connection gets generated.&lt;/p&gt;</comment>
                            <comment id="52583" author="ecelgp" created="Wed, 18 Oct 2017 01:25:19 +0000"  >&lt;p&gt;Hi Tom,&lt;/p&gt;

&lt;p&gt;From the netstat printouts for a recent failure:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-oxygen/19/log.html.gz#s1-s6&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-oxygen/19/log.html.gz#s1-s6&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;It seems clear there is a relation between duplicated akka connections and the issue:&lt;/p&gt;

&lt;p&gt;Failing restarting instance (member-1):&lt;br/&gt;
tcp6       0      0 10.29.15.220:2550       :::*                    LISTEN      18273/java          &lt;br/&gt;
tcp6       0      0 10.29.15.220:46724      10.29.14.239:2550       ESTABLISHED 18273/java          &lt;br/&gt;
tcp6       0      0 10.29.15.220:2550       10.29.13.230:33632      ESTABLISHED 18273/java          &lt;br/&gt;
tcp6       0      0 10.29.15.220:2550       10.29.14.239:49522      ESTABLISHED 18273/java          &lt;br/&gt;
tcp6       0      0 10.29.15.220:57166      10.29.13.230:2550       ESTABLISHED 18273/java          &lt;/p&gt;

&lt;p&gt;Other instance (member-2):&lt;br/&gt;
tcp6       0      0 10.29.13.230:2550       :::*                    LISTEN      16951/java          &lt;br/&gt;
tcp6       0      0 10.29.13.230:2550       10.29.15.220:57166      ESTABLISHED 16951/java          &lt;br/&gt;
tcp6       0    428 10.29.13.230:46226      10.29.14.239:2550       ESTABLISHED 16951/java          &lt;br/&gt;
tcp6       0      0 10.29.13.230:33632      10.29.15.220:2550       ESTABLISHED 16951/java &lt;/p&gt;

&lt;p&gt;Other instance (member-3):&lt;br/&gt;
tcp6       0      0 10.29.14.239:2550       :::*                    LISTEN      16693/java          &lt;br/&gt;
tcp6       0    450 10.29.14.239:2550       10.29.13.230:46226      ESTABLISHED 16693/java          &lt;br/&gt;
tcp6       0      0 10.29.14.239:2550       10.29.15.220:46724      ESTABLISHED 16693/java          &lt;br/&gt;
tcp6       0      0 10.29.14.239:49522      10.29.15.220:2550       ESTABLISHED 16693/java&lt;/p&gt;

&lt;p&gt;Also looking at the logs it seems member-2 and member-3 try to connect the restarting instance at the same time the tcp socket gets available in member-1:&lt;/p&gt;

&lt;p&gt;2017-10-17 09:45:26,352 | INFO  | ult-dispatcher-2 | Remoting                         | 41 - com.typesafe.akka.slf4j - 2.4.18 | Remoting started; listening on addresses :&lt;span class=&quot;error&quot;&gt;&amp;#91;akka.tcp://opendaylight-cluster-data@10.29.15.220:2550&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;So is there any possibility of a race condition in the restarting instance so that it accepts both connections from member-2 &amp;amp; member-3 and at the same time creates different connections to member-2 and member-3 to join the cluster? in such scenario is it possible member-1 sends and expects messages from one connection while member-2 and member-3 sends and expects messages from other connection?&lt;/p&gt;</comment>
                            <comment id="52584" author="tpantelis" created="Wed, 18 Oct 2017 02:17:06 +0000"  >&lt;p&gt;I don&apos;t know. I haven&apos;t studied akka code to that detail. I would suggest opening a case with akka to answer your questions.&lt;/p&gt;</comment>
                            <comment id="52585" author="ecelgp" created="Wed, 18 Oct 2017 06:23:53 +0000"  >&lt;p&gt;Well, it is not yet clear whether the double connection is the origin or a consequence of the issue. I think next step will be to enable some akka debug in CSIT. In my local setup I changed the akka.conf as instructed:&lt;/p&gt;

&lt;p&gt;    loglevel = &quot;DEBUG&quot;&lt;br/&gt;
    log-config-on-start = on&lt;br/&gt;
    actor {&lt;br/&gt;
      debug &lt;/p&gt;
{
        autoreceive = on
        lifecycle = on
        unhandled = on
        fsm = on
      }
&lt;p&gt;    }&lt;/p&gt;

&lt;p&gt;But I do not see any DEBUG message in karaf.log if this is the place where akka log is dumped, maybe I need to enable some DEBUG in the karaf logger itself, any idea of what is missing?&lt;/p&gt;</comment>
                            <comment id="52586" author="vrpolak" created="Wed, 18 Oct 2017 10:22:08 +0000"  >&lt;p&gt;&amp;gt; I need to enable some DEBUG in the karaf logger itself&lt;/p&gt;

&lt;p&gt;Yes. See &lt;span class=&quot;error&quot;&gt;&amp;#91;12&amp;#93;&lt;/span&gt;.&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;12&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://git.opendaylight.org/gerrit/#/c/58933/14/jjb/integration/integration-configure-clustering.sh@95&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/#/c/58933/14/jjb/integration/integration-configure-clustering.sh@95&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="59724" author="ecelgp" created="Thu, 19 Oct 2017 04:26:19 +0000"  >&lt;p&gt;Thanks Vratko, it works but unfortunately the akka messaging debug is very chatty so I will start with some other debug and see what happens.&lt;/p&gt;</comment>
                            <comment id="59879" author="ecelgp" created="Wed, 1 Nov 2017 19:50:31 +0000"  >&lt;p&gt;After adding the DEBUGs, I can confirm issue happens when the restarting member and the cluster leader initiates AKKA connection at the same time. The restarting member initiates the connection as part of the boot while the cluster leader is trying every 5 secs. Can we do something in our code to handle this conflict? if not I guess I will open an issue in akka. Tom, which version of akka are we using in ODL?&lt;/p&gt;</comment>
                            <comment id="59880" author="ecelgp" created="Wed, 1 Nov 2017 20:18:24 +0000"  >&lt;p&gt;Node 3 (restarting member) log in &lt;span class=&quot;error&quot;&gt;&amp;#91;1&amp;#93;&lt;/span&gt;:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
2017-10-31 22:47:55,553 | DEBUG | lt-dispatcher-25 | AkkaProtocolManager              | 41 - com.typesafe.akka.slf4j - 2.4.18 | now supervising Actor[akka:&lt;span class=&quot;code-comment&quot;&gt;//opendaylight-cluster-data/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2Fopendaylight-cluster-data%4010.29.14.65%3A2550-1#175430158]
&lt;/span&gt;2017-10-31 22:47:55,554 | DEBUG | lt-dispatcher-25 | AkkaProtocolManager              | 41 - com.typesafe.akka.slf4j - 2.4.18 | now supervising Actor[akka:&lt;span class=&quot;code-comment&quot;&gt;//opendaylight-cluster-data/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2Fopendaylight-cluster-data%4010.29.13.208%3A2550-2#-2037739644]
&lt;/span&gt;2017-10-31 22:47:55,590 | INFO  | rint Extender: 3 | AbstractDataStore                | 217 - org.opendaylight.controller.sal-distributed-datastore - 1.6.1.SNAPSHOT | Creating ShardManager : shardmanager-config
2017-10-31 22:47:55,596 | DEBUG | ult-dispatcher-2 | ProtocolStateActor               | 41 - com.typesafe.akka.slf4j - 2.4.18 | started (akka.remote.transport.ProtocolStateActor@35fbf1c1)
2017-10-31 22:47:55,597 | DEBUG | ult-dispatcher-2 | ProtocolStateActor               | 41 - com.typesafe.akka.slf4j - 2.4.18 | started (akka.remote.transport.ProtocolStateActor@519a5d1b)
2017-10-31 22:47:55,597 | DEBUG | ult-dispatcher-2 | AkkaProtocolManager              | 41 - com.typesafe.akka.slf4j - 2.4.18 | now supervising Actor[akka:&lt;span class=&quot;code-comment&quot;&gt;//opendaylight-cluster-data/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2Fopendaylight-cluster-data%4010.29.14.65%3A59194-3#1453714731]
&lt;/span&gt;2017-10-31 22:47:55,597 | DEBUG | ult-dispatcher-2 | ProtocolStateActor               | 41 - com.typesafe.akka.slf4j - 2.4.18 | started (akka.remote.transport.ProtocolStateActor@76642d3b)
2017-10-31 22:47:55,601 | DEBUG | lt-dispatcher-25 | AkkaProtocolManager              | 41 - com.typesafe.akka.slf4j - 2.4.18 | now supervising Actor[akka:&lt;span class=&quot;code-comment&quot;&gt;//opendaylight-cluster-data/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2Fopendaylight-cluster-data%4010.29.13.208%3A42796-4#562821358]
&lt;/span&gt;2017-10-31 22:47:55,601 | DEBUG | lt-dispatcher-25 | ProtocolStateActor               | 41 - com.typesafe.akka.slf4j - 2.4.18 | started (akka.remote.transport.ProtocolStateActor@79b35ebe)
2017-10-31 22:47:55,675 | DEBUG | lt-dispatcher-25 | ProtocolStateActor               | 41 - com.typesafe.akka.slf4j - 2.4.18 | stopped
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Node 2 (cluster leader) log in &lt;span class=&quot;error&quot;&gt;&amp;#91;2&amp;#93;&lt;/span&gt;:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
2017-10-31 22:47:55,587 | DEBUG | ult-dispatcher-6 | AkkaProtocolManager              | 41 - com.typesafe.akka.slf4j - 2.4.18 | now supervising Actor[akka:&lt;span class=&quot;code-comment&quot;&gt;//opendaylight-cluster-data/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2Fopendaylight-cluster-data%4010.29.15.87%3A2550-6#-1931382701]
&lt;/span&gt;2017-10-31 22:47:55,587 | DEBUG | ult-dispatcher-6 | ProtocolStateActor               | 41 - com.typesafe.akka.slf4j - 2.4.18 | started (akka.remote.transport.ProtocolStateActor@6a4fb40c)
2017-10-31 22:47:55,595 | DEBUG | ult-dispatcher-5 | AkkaProtocolManager              | 41 - com.typesafe.akka.slf4j - 2.4.18 | now supervising Actor[akka:&lt;span class=&quot;code-comment&quot;&gt;//opendaylight-cluster-data/system/transports/akkaprotocolmanager.tcp0/akkaProtocol-tcp%3A%2F%2Fopendaylight-cluster-data%4010.29.15.87%3A53374-7#691018071]
&lt;/span&gt;2017-10-31 22:47:55,596 | DEBUG | ult-dispatcher-5 | ProtocolStateActor               | 41 - com.typesafe.akka.slf4j - 2.4.18 | started (akka.remote.transport.ProtocolStateActor@5146512)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;1&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/208/odl3_karaf.log.gz&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/208/odl3_karaf.log.gz&lt;/a&gt;&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;2&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/208/odl2_karaf.log.gz&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/openflowplugin-csit-3node-clustering-only-nitrogen/208/odl2_karaf.log.gz&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="60011" author="ecelgp" created="Fri, 10 Nov 2017 01:58:29 +0000"  >&lt;p&gt;FYI I opened this issue in AKKA:&lt;br/&gt;
&lt;a href=&quot;https://github.com/akka/akka/issues/23959&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/akka/akka/issues/23959&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="62755" author="opendaylight.release" created="Thu, 3 May 2018 09:38:25 +0000"  >&lt;p&gt;Since the bug is unassigned I&apos;m currently assigning it to you.&lt;/p&gt;

&lt;p&gt;Please assign to the relevant person.&#160;&lt;/p&gt;</comment>
                            <comment id="62757" author="rovarga" created="Thu, 3 May 2018 09:44:20 +0000"  >&lt;p&gt;Oxygen SR1 and Fluorine are using akka_2.12-2.5.11, scala 2.12.5.&lt;/p&gt;</comment>
                            <comment id="62803" author="ecelgp" created="Thu, 3 May 2018 17:02:27 +0000"  >&lt;p&gt;This was identified as an AKKA issue, I will recheck on mentioned branches.&lt;/p&gt;</comment>
                            <comment id="63097" author="ecelgp" created="Thu, 24 May 2018 02:02:43 +0000"  >&lt;p&gt;This seems like the issue is fixed after last AKKA update:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jenkins.opendaylight.org/releng/view/CSIT-3node/job/openflowplugin-csit-3node-clustering-bulkomatic-only-oxygen/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jenkins.opendaylight.org/releng/view/CSIT-3node/job/openflowplugin-csit-3node-clustering-bulkomatic-only-oxygen/&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10002">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="26308">CONTROLLER-1754</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="13677" name="karaf-1.log" size="284168" author="ecelgp@gmail.com" created="Sun, 27 Aug 2017 22:06:59 +0000"/>
                            <attachment id="13678" name="karaf-2.log" size="43297" author="ecelgp@gmail.com" created="Sun, 27 Aug 2017 22:07:34 +0000"/>
                            <attachment id="13679" name="karaf-3.log" size="43144" author="ecelgp@gmail.com" created="Sun, 27 Aug 2017 22:08:23 +0000"/>
                            <attachment id="13676" name="karaf_member_restart.txt" size="137367" author="ecelgp@gmail.com" created="Thu, 17 Aug 2017 17:51:43 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                            <customfield id="customfield_11400" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10208" key="com.atlassian.jira.plugin.system.customfieldtypes:textfield">
                        <customfieldname>External issue ID</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9006</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10201" key="com.atlassian.jira.plugin.system.customfieldtypes:url">
                        <customfieldname>External issue URL</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[https://bugs.opendaylight.org/show_bug.cgi?id=9006]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10206" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Issue Type</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10300"><![CDATA[Bug]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10204" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>ODL SR Target Milestone</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10384"><![CDATA[Carbon-SR3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10000" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0|i02si7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>