<!-- 
RSS generated by JIRA (8.20.10#820010-sha1:ace47f9899e9ee25d7157d59aa17ab06aee30d3d) at Wed Feb 07 19:55:49 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>OpenDaylight JIRA</title>
    <link>https://jira.opendaylight.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>8.20.10</version>
        <build-number>820010</build-number>
        <build-date>22-06-2022</build-date>
    </build-info>


<item>
            <title>[CONTROLLER-1546] Operations Failed after Failover with exceptions and Errors</title>
                <link>https://jira.opendaylight.org/browse/CONTROLLER-1546</link>
                <project id="10113" key="CONTROLLER">controller</project>
                    <description>&lt;p&gt;Image USed RC3.3: &lt;a href=&quot;https://nexus.opendaylight.org/content/repositories/autorelease-1484/org/opendaylight/integration/distribution-karaf/0.5.0-Boron/distribution-karaf-0.5.0-Boron.zip&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://nexus.opendaylight.org/content/repositories/autorelease-1484/org/opendaylight/integration/distribution-karaf/0.5.0-Boron/distribution-karaf-0.5.0-Boron.zip&lt;/a&gt;&lt;/p&gt;


&lt;p&gt;Operations:&lt;/p&gt;

&lt;p&gt; 1. Setup 3 node ODL (feature: odl-ovsdb-openstack aka Legacy Netvirt)&lt;/p&gt;

&lt;p&gt; 2. Attempted failover in thie sequence&lt;/p&gt;

&lt;p&gt;   ODL1 down ODL2 up ODL3 up&lt;/p&gt;

&lt;p&gt;   ODL1 up, ODL2 down ODL3 up&lt;/p&gt;

&lt;p&gt; --&amp;gt; Operations started to fail here&lt;/p&gt;

&lt;p&gt;  ODL1 up, ODL2 up, ODL3 up&lt;/p&gt;

&lt;p&gt;  --&amp;gt; no recovery  &lt;/p&gt;

&lt;p&gt;       At this point ODL3 was listed as &quot;owner&quot; for all entities. All the operations were  failing. Checking the logs in ODL2 and ODL3 indiciated a lot of blueprint errors and some WARN indicating problems with remoterpc&lt;/p&gt;

&lt;p&gt;2016-09-09 21:48:21,009 | WARN  | ult-dispatcher-2 | RpcRegistry                      | 168 - org.opendaylight.controller.sal-remoterpc-connector - 1.4.0.Boron | Timed out finding routers for RouteIdentifierImpl{context=null, type=(urn:opendaylight:packet:service?revision=2013-07-09)transmit-packet, route=/(urn:opendaylight:inventory?revision=2013-08-19)nodes/node/node[&lt;/p&gt;
{(urn:opendaylight:inventory?revision=2013-08-19)id=openflow:92945849353687}
&lt;p&gt;]}&lt;br/&gt;
2016-09-09 21:48:21,009 | WARN  | ult-dispatcher-2 | RpcRegistry                      | 168 - org.opendaylight.controller.sal-remoterpc-connector - 1.4.0.Boron | Timed out finding routers for RouteIdentifierImpl{context=null, type=(urn:opendaylight:packet:service?revision=2013-07-09)transmit-packet, route=/(urn:opendaylight:inventory?revision=2013-08-19)nodes/node/node[&lt;/p&gt;
{(urn:opendaylight:inventory?revision=2013-08-19)id=openflow:92945849353687}
&lt;p&gt;]}&lt;br/&gt;
2016-09-09 21:48:21,009 | WARN  | ult-dispatcher-2 | RpcRegistry                      | 168 - org.opendaylight.controller.sal-remoterpc-connector - 1.4.0.Boron | Timed out finding routers for RouteIdentifierImpl{context=null, type=(urn:opendaylight:packet:service?revision=2013-07-09)transmit-packet, route=/(urn:opendaylight:inventory?revision=2013-08-19)nodes/node/node[&lt;/p&gt;
{(urn:opendaylight:inventory?revision=2013-08-19)id=openflow:92945849353687}
&lt;p&gt;]}&lt;/p&gt;</description>
                <environment>&lt;p&gt;Operating System: All&lt;br/&gt;
Platform: All&lt;/p&gt;</environment>
        <key id="26100">CONTROLLER-1546</key>
            <summary>Operations Failed after Failover with exceptions and Errors</summary>
                <type id="10104" iconUrl="https://jira.opendaylight.org/secure/viewavatar?size=xsmall&amp;avatarId=10303&amp;avatarType=issuetype">Bug</type>
                                                <status id="5" iconUrl="https://jira.opendaylight.org/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="green"/>
                                    <resolution id="10003">Cannot Reproduce</resolution>
                                        <assignee username="-1">Unassigned</assignee>
                                    <reporter username="gvrangan">Venkatrangan Govindarajan</reporter>
                        <labels>
                    </labels>
                <created>Fri, 9 Sep 2016 21:48:41 +0000</created>
                <updated>Tue, 25 Jul 2023 08:24:12 +0000</updated>
                            <resolved>Tue, 11 Oct 2016 20:53:24 +0000</resolved>
                                                                    <component>clustering</component>
                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                                                                <comments>
                            <comment id="51512" author="gvrangan" created="Fri, 9 Sep 2016 22:14:10 +0000"  >&lt;p&gt;Attachment odl1_log.tgz has been added with description: ODL1 Logs&lt;/p&gt;</comment>
                            <comment id="51513" author="gvrangan" created="Fri, 9 Sep 2016 22:14:31 +0000"  >&lt;p&gt;Attachment odl2_log.tgz has been added with description: ODL2 logs&lt;/p&gt;</comment>
                            <comment id="51514" author="gvrangan" created="Fri, 9 Sep 2016 22:15:50 +0000"  >&lt;p&gt;Attachment odl3_log.tgz has been added with description: ODL3 logs&lt;/p&gt;</comment>
                            <comment id="51505" author="gvrangan" created="Fri, 9 Sep 2016 22:16:23 +0000"  >&lt;p&gt;Logs uploaded, Please check the blueprint errors in ODL2 and remoteprc errors in ODL3.&lt;/p&gt;</comment>
                            <comment id="51506" author="gvrangan" created="Fri, 9 Sep 2016 22:45:51 +0000"  >&lt;p&gt;ODL1 log has some entrie with &quot;NullPointer Exception&quot; while startup mostly when invoking entity ownership API.&lt;/p&gt;</comment>
                            <comment id="51507" author="gvrangan" created="Mon, 12 Sep 2016 15:15:35 +0000"  >&lt;p&gt;Could not reproduce the failure. But still the ERROR and exceptins seen in log&lt;/p&gt;</comment>
                            <comment id="51508" author="tpantelis" created="Fri, 16 Sep 2016 14:50:02 +0000"  >&lt;p&gt;There are many NPE&apos;s in ForwardingRulesManagerImpl:&lt;/p&gt;

&lt;p&gt;2016-09-09 20:39:10,864 | ERROR | on-dispatcher-31 | DataTreeChangeListenerActor      | 170 - org.opendaylight.controller.sal-distributed-datastore - 1.4.0.Boron | Error notifying listener org.opendaylight.controller.md.sal.binding.impl.BindingClusteredDOMDataTreeChangeListenerAdapter@2a4f203b&lt;br/&gt;
java.lang.NullPointerException&lt;br/&gt;
        at org.opendaylight.openflowplugin.applications.frm.impl.ForwardingRulesManagerImpl.isNodeOwner(ForwardingRulesManagerImpl.java:261)&lt;span class=&quot;error&quot;&gt;&amp;#91;281:org.opendaylight.openflowplugin.applications.forwardingrules-manager:0.3.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.openflowplugin.applications.frm.impl.AbstractListeningCommiter.preConfigurationCheck(AbstractListeningCommiter.java:111)&lt;span class=&quot;error&quot;&gt;&amp;#91;281:org.opendaylight.openflowplugin.applications.forwardingrules-manager:0.3.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.openflowplugin.applications.frm.impl.AbstractListeningCommiter.onDataTreeChanged(AbstractListeningCommiter.java:53)&lt;span class=&quot;error&quot;&gt;&amp;#91;281:org.opendaylight.openflowplugin.applications.forwardingrules-manager:0.3.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.controller.md.sal.binding.impl.BindingDOMDataTreeChangeListenerAdapter.onDataTreeChanged(BindingDOMDataTreeChangeListenerAdapter.java:41)&lt;span class=&quot;error&quot;&gt;&amp;#91;141:org.opendaylight.controller.sal-binding-broker-impl:1.4.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.controller.cluster.datastore.DataTreeChangeListenerActor.dataChanged(DataTreeChangeListenerActor.java:55)&lt;span class=&quot;error&quot;&gt;&amp;#91;170:org.opendaylight.controller.sal-distributed-datastore:1.4.0.Boron&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;Also a few in NeutronNetworkChangeListener:&lt;/p&gt;

&lt;p&gt;2016-09-09 20:39:12,906 | ERROR | on-dispatcher-38 | DataChangeListener               | 170 - org.opendaylight.controller.sal-distributed-datastore - 1.4.0.Boron | Error notifying listener org.opendaylight.netvirt.openstack.netvirt.translator.iaware.impl.NeutronNetworkChangeListener&lt;br/&gt;
java.lang.NullPointerException&lt;br/&gt;
        at com.google.common.base.Preconditions.checkNotNull(Preconditions.java:210)&lt;span class=&quot;error&quot;&gt;&amp;#91;38:com.google.guava:18.0.0&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.netvirt.openstack.netvirt.AbstractHandler.enqueueEvent(AbstractHandler.java:69)&lt;span class=&quot;error&quot;&gt;&amp;#91;290:org.opendaylight.netvirt.openstack.net-virt:1.3.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.netvirt.openstack.netvirt.NetworkHandler.neutronNetworkCreated(NetworkHandler.java:68)&lt;span class=&quot;error&quot;&gt;&amp;#91;290:org.opendaylight.netvirt.openstack.net-virt:1.3.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.netvirt.openstack.netvirt.translator.iaware.impl.NeutronNetworkChangeListener.createNetwork(NeutronNetworkChangeListener.java:87)&lt;span class=&quot;error&quot;&gt;&amp;#91;290:org.opendaylight.netvirt.openstack.net-virt:1.3.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.netvirt.openstack.netvirt.translator.iaware.impl.NeutronNetworkChangeListener.onDataChanged(NeutronNetworkChangeListener.java:74)&lt;span class=&quot;error&quot;&gt;&amp;#91;290:org.opendaylight.netvirt.openstack.net-virt:1.3.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.controller.md.sal.binding.impl.AbstractForwardedDataBroker$TranslatingDataChangeInvoker.onDataChanged(AbstractForwardedDataBroker.java:143)&lt;span class=&quot;error&quot;&gt;&amp;#91;141:org.opendaylight.controller.sal-binding-broker-impl:1.4.0.Boron&amp;#93;&lt;/span&gt;&lt;br/&gt;
        at org.opendaylight.controller.cluster.datastore.DataChangeListener.dataChanged(DataChangeListener.java:71)&lt;span class=&quot;error&quot;&gt;&amp;#91;170:org.opendaylight.controller.sal-distributed-datastore:1.4.0.Boron&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;These should be looked at by someone familiar with that code. I don&apos;t know the impact.&lt;/p&gt;

&lt;p&gt;On ODL3, I see a couple transaction failures and a read failure (&quot;Failure to delete ovsdbNode&quot;) around 2016-09-09 18:15:15 with message &quot;Metadata not available&quot; which indicates it tried to delete/read a node that doesn&apos;t exist.&lt;/p&gt;

&lt;p&gt;I don&apos;t see any blueprint errors on ODL2 but any blueprint issues would occur on startup.&lt;/p&gt;

&lt;p&gt;The RpcRegistry warning means a client tried to send a routed RPC but there&apos;s no implementation registered for the routed node path. That could be b/c there was a prior registration but was unregistered or there never was/is a registration or there is a registration on a remote controller node but hasn&apos;t propagated to the calling controller node yet. However there is a 5 sec wait for convergence hence the &quot;Timed out finding routers&quot; message. &lt;/p&gt;

&lt;p&gt;Wrt the EOS, member-2 became the shard leader originally and transferred leadership to member-3 when it was shut down at 2016-09-09 20:47:54. Since both member-1 and member-2 had been shut down and restarted, I would expect for member-3 would be the owner for the entities. Interestingly, 2016-09-09 20:13:01,009 is the last timestamp in the logs for ODL3.&lt;/p&gt;

&lt;p&gt;I&apos;m unclear as to the exact issue observed as I&apos;m not familiar with ovsdb. I&apos;m also unclear as to whether there&apos;s any issue with clustering here. As I mentioned the EOS behavior looks correct but w/o EOS debug enabled I can&apos;t tell for sure.&lt;/p&gt;

&lt;p&gt;The NPE&apos;s mentioned above could be significant. I would suggest having ovsdb folks take a look.&lt;/p&gt;</comment>
                            <comment id="51509" author="tpantelis" created="Fri, 16 Sep 2016 15:04:46 +0000"  >&lt;p&gt;From the logs, the sequence of restarts is:&lt;/p&gt;

&lt;p&gt;2016-09-09 18:13 - all nodes up&lt;br/&gt;
2016-09-09 18:41 - ODL1 stopped&lt;br/&gt;
2016-09-09 20:38 - ODL1 restarted&lt;br/&gt;
2016-09-09 20:47 - ODL2 stopped&lt;br/&gt;
2016-09-09 21:15 - ODL2 restarted&lt;/p&gt;</comment>
                            <comment id="51510" author="ananthip@hcl.com" created="Mon, 19 Sep 2016 13:56:18 +0000"  >&lt;p&gt;Followed the steps mentioned above. &lt;br/&gt;
Observed the karaf logs, but couldn&apos;t find the above warn messages.&lt;br/&gt;
Thus, the bug is not reproduced.&lt;/p&gt;</comment>
                            <comment id="51511" author="tpantelis" created="Tue, 11 Oct 2016 20:53:24 +0000"  >&lt;p&gt;Closing this as it&apos;s not reproducible and whatever issue there was doesn&apos;t seem to related to clustering based on analysis of the logs. As mentioned before, the NPE&apos;s emanating from ovsdb look ominous and may have been the cause so I would suggest creating a bug in that project.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="13587" name="odl1_log.tgz" size="121507" author="gvrangan" created="Fri, 9 Sep 2016 22:14:10 +0000"/>
                            <attachment id="13588" name="odl2_log.tgz" size="144112" author="gvrangan" created="Fri, 9 Sep 2016 22:14:31 +0000"/>
                            <attachment id="13589" name="odl3_log.tgz" size="99431" author="gvrangan" created="Fri, 9 Sep 2016 22:15:50 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                            <customfield id="customfield_11400" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10208" key="com.atlassian.jira.plugin.system.customfieldtypes:textfield">
                        <customfieldname>External issue ID</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6686</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10201" key="com.atlassian.jira.plugin.system.customfieldtypes:url">
                        <customfieldname>External issue URL</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[https://bugs.opendaylight.org/show_bug.cgi?id=6686]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10000" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0|i02r8n:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>