<!-- 
RSS generated by JIRA (8.20.10#820010-sha1:ace47f9899e9ee25d7157d59aa17ab06aee30d3d) at Wed Feb 07 19:54:37 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>OpenDaylight JIRA</title>
    <link>https://jira.opendaylight.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>8.20.10</version>
        <build-number>820010</build-number>
        <build-date>22-06-2022</build-date>
    </build-info>


<item>
            <title>[CONTROLLER-1072] Clustering: akka.pattern.AskTimeoutException when sending large amounts of BGP data to EXABGP</title>
                <link>https://jira.opendaylight.org/browse/CONTROLLER-1072</link>
                <project id="10113" key="CONTROLLER">controller</project>
                    <description>&lt;p&gt;The following exception occurs when I try to send 100000 routes through BGP into any clustered setup. Tested on &lt;a href=&quot;https://jenkins.opendaylight.org/integration/view/Integration%20jobs/job/integration-master-project-centralized-integration/lastSuccessfulBuild/artifact/distributions/extra/karaf/target/distribution-karaf-0.3.0-SNAPSHOT.tar.gz:&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jenkins.opendaylight.org/integration/view/Integration%20jobs/job/integration-master-project-centralized-integration/lastSuccessfulBuild/artifact/distributions/extra/karaf/target/distribution-karaf-0.3.0-SNAPSHOT.tar.gz:&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;akka.pattern.AskTimeoutException: Ask timed out on &lt;a href=&quot;#1797666910)]&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;ActorSelection[Anchor(akka://opendaylight-cluster-data/), Path(/user/shardmanager-operational/member-1-shard-default-operational/shard-member-1-txn-88#1797666910)]&lt;/a&gt; after &lt;span class=&quot;error&quot;&gt;&amp;#91;5000 ms&amp;#93;&lt;/span&gt;&lt;br/&gt;
    at akka.pattern.PromiseActorRef$$anonfun$1.apply$mcV$sp(AskSupport.scala:333)&lt;span class=&quot;error&quot;&gt;&amp;#91;312:com.typesafe.akka.actor:2.3.4&amp;#93;&lt;/span&gt;&lt;br/&gt;
    at akka.actor.Scheduler$$anon$7.run(Scheduler.scala:117)&lt;span class=&quot;error&quot;&gt;&amp;#91;312:com.typesafe.akka.actor:2.3.4&amp;#93;&lt;/span&gt;&lt;br/&gt;
    at scala.concurrent.Future$InternalCallbackExecutor$.scala$concurrent$Future$InternalCallbackExecutor$$unbatchedExecute(Future.scala:694)[309:org.scala-lang.scala&lt;br/&gt;
-library:2.10.4.v20140209-180020-VFINAL-b66a39653b]&lt;br/&gt;
    at scala.concurrent.Future$InternalCallbackExecutor$.execute(Future.scala:691)&lt;span class=&quot;error&quot;&gt;&amp;#91;309:org.scala-lang.scala-library:2.10.4.v20140209-180020-VFINAL-b66a39653b&amp;#93;&lt;/span&gt;&lt;br/&gt;
    at akka.actor.LightArrayRevolverScheduler$TaskHolder.executeTask(Scheduler.scala:467)&lt;span class=&quot;error&quot;&gt;&amp;#91;312:com.typesafe.akka.actor:2.3.4&amp;#93;&lt;/span&gt;&lt;br/&gt;
    at akka.actor.LightArrayRevolverScheduler$$anon$8.executeBucket$1(Scheduler.scala:419)&lt;span class=&quot;error&quot;&gt;&amp;#91;312:com.typesafe.akka.actor:2.3.4&amp;#93;&lt;/span&gt;&lt;br/&gt;
    at akka.actor.LightArrayRevolverScheduler$$anon$8.nextTick(Scheduler.scala:423)&lt;span class=&quot;error&quot;&gt;&amp;#91;312:com.typesafe.akka.actor:2.3.4&amp;#93;&lt;/span&gt;&lt;br/&gt;
    at akka.actor.LightArrayRevolverScheduler$$anon$8.run(Scheduler.scala:375)&lt;span class=&quot;error&quot;&gt;&amp;#91;312:com.typesafe.akka.actor:2.3.4&amp;#93;&lt;/span&gt;&lt;br/&gt;
    at java.lang.Thread.run(Unknown Source)&lt;span class=&quot;error&quot;&gt;&amp;#91;:1.7.0_67&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;This problem does not occur when I use a non-clustered setup.&lt;/p&gt;</description>
                <environment>&lt;p&gt;Operating System: All&lt;br/&gt;
Platform: All&lt;/p&gt;</environment>
        <key id="25626">CONTROLLER-1072</key>
            <summary>Clustering: akka.pattern.AskTimeoutException when sending large amounts of BGP data to EXABGP</summary>
                <type id="10104" iconUrl="https://jira.opendaylight.org/secure/viewavatar?size=xsmall&amp;avatarId=10303&amp;avatarType=issuetype">Bug</type>
                                                <status id="5" iconUrl="https://jira.opendaylight.org/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="green"/>
                                    <resolution id="10002">Duplicate</resolution>
                                        <assignee username="harmasin@cisco.com">Harman Singh</assignee>
                                    <reporter username="jbehran@cisco.com">Jozef Behran</reporter>
                        <labels>
                    </labels>
                <created>Tue, 16 Dec 2014 08:47:41 +0000</created>
                <updated>Thu, 4 Jun 2015 19:00:23 +0000</updated>
                            <resolved>Thu, 4 Jun 2015 19:00:23 +0000</resolved>
                                    <version>Post-Helium</version>
                                                    <component>mdsal</component>
                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                                                                <comments>
                            <comment id="49952" author="jbehran@cisco.com" created="Tue, 16 Dec 2014 09:27:50 +0000"  >&lt;p&gt;Additional information:&lt;/p&gt;

&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Topology provider was turned ON during all tests.&lt;/li&gt;
	&lt;li&gt;It is triggred even without topology (RIB only is enough).&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="49953" author="jbehran@cisco.com" created="Tue, 16 Dec 2014 18:29:49 +0000"  >&lt;p&gt;After long investigation I realized that only about 6000 routes will go through before these timeouts show up. And it does not matter whether I run the topology provider or not. At 7500 I start to get this AKKA timeout error.&lt;/p&gt;</comment>
                            <comment id="49954" author="jbehran@cisco.com" created="Wed, 17 Dec 2014 18:35:48 +0000"  >&lt;p&gt;To hit this bug configure EXABGP with 10000 routes and then point it to an ODL instance configured with clustering enabled. I advise to switch topology off as RIB is just enough to hit the bug. The attachment is an EXABGP configuration file with 15000 routes which is fairly large but not too large.&lt;/p&gt;</comment>
                            <comment id="49969" author="jbehran@cisco.com" created="Wed, 17 Dec 2014 18:35:48 +0000"  >&lt;p&gt;Attachment 15k.cfg.xz has been added with description: EXABGP configuration with 15000 routes&lt;/p&gt;</comment>
                            <comment id="49955" author="moraja@cisco.com" created="Wed, 17 Dec 2014 21:53:55 +0000"  >&lt;p&gt;How do we unzip this xz file ? tar -xzf does not work with it&lt;/p&gt;</comment>
                            <comment id="49956" author="moraja@cisco.com" created="Wed, 17 Dec 2014 22:16:45 +0000"  >&lt;p&gt;I had to download Ez7z on mac to unzip the xz attachment.&lt;/p&gt;</comment>
                            <comment id="49957" author="vrpolak" created="Thu, 18 Dec 2014 10:37:54 +0000"  >&lt;p&gt;(In reply to Moiz Raja from comment #4)&lt;br/&gt;
&amp;gt; How do we unzip this xz file ? tar -xzf does not work with it&lt;/p&gt;

&lt;p&gt;The &apos;z&apos; from -xzf is for .gz only. The letter for .xz is &apos;J&apos;.&lt;br/&gt;
Every linux-based tar program I have seen works well with just &quot;tar -xf&quot;, no mater if targeted to .tar, .tar.gz (.tgz) or .tar.xz (.txz).&lt;/p&gt;</comment>
                            <comment id="49958" author="vrpolak" created="Thu, 18 Dec 2014 10:40:34 +0000"  >&lt;p&gt;(In reply to Vratko Pol&#225;k from comment #6)&lt;br/&gt;
&amp;gt; (In reply to Moiz Raja from comment #4)&lt;br/&gt;
&amp;gt; &amp;gt; How do we unzip this xz file ? tar -xzf does not work with it&lt;/p&gt;

&lt;p&gt;Oh, there was no .tar before .xz&lt;br/&gt;
In that case, &quot;unxz&quot; command is available in Linux distributions, usually in a package named xz-utils.&lt;/p&gt;</comment>
                            <comment id="49959" author="moraja@cisco.com" created="Wed, 14 Jan 2015 23:49:10 +0000"  >&lt;p&gt;&lt;a href=&quot;https://git.opendaylight.org/gerrit/#/c/14155/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/#/c/14155/&lt;/a&gt; - master&lt;/p&gt;</comment>
                            <comment id="49960" author="jbehran@cisco.com" created="Tue, 3 Mar 2015 14:04:09 +0000"  >&lt;p&gt;The change increased resiliency against large batches of data but after trying to push 1 million prefixes I still get akka.TimeOutException. Or a &quot;dead letter encountered&quot; log record.&lt;/p&gt;

&lt;p&gt;After reviewing the fix I found a suspicious line (in TransactionProxy.java):&lt;/p&gt;

&lt;p&gt;this.operationLimiter = new Semaphore(actorContext.getTransactionOutstandingOperationLimit());&lt;/p&gt;

&lt;p&gt;I suspect the limit here is too large. It occurs to me that it does not give AKKA enough &quot;breathing space&quot; so when e.g. disk swapping occurs at a time when there is a bunch of huge transactions in the queue and another bunch waiting at the limiter to be transmitted, then AKKA starts getting timeouts.&lt;/p&gt;

&lt;p&gt;However consultation with other developers revealed that there are multiple timeouts and other buffering limits involved so I am not sure at all what is wrong here and/or how to fix it.&lt;/p&gt;

&lt;p&gt;Increasing the akka timeout to 900 seconds makes the problem go away.&lt;/p&gt;</comment>
                            <comment id="49961" author="moraja@cisco.com" created="Wed, 4 Mar 2015 23:22:51 +0000"  >&lt;p&gt;Jozef,&lt;/p&gt;

&lt;p&gt;Couple of questions,&lt;/p&gt;

&lt;p&gt;1. Which akka timeout did you increase to 900 seconds to fix this issue? Was it the operation timeout or was it the transaction timeout?&lt;/p&gt;

&lt;p&gt;2. How do we reproduce this problem with 1 Million prefixes?&lt;/p&gt;

&lt;p&gt;3. How much time does it take to ingest 1 Million prefixes?&lt;/p&gt;</comment>
                            <comment id="49962" author="jbehran@cisco.com" created="Mon, 9 Mar 2015 12:23:22 +0000"  >&lt;p&gt;After more testing I discovered that this exception occurs reliably when I try to push 2 million of prefixes into BGP. Changing the AKKA timeout does not affect anything in this case. The exception will occur after roughly 5 minutes even when I set the timeout to 15 minutes. Additionally I realized that the exception can occur roughly once in 5 tries even when running on 1 million.&lt;/p&gt;

&lt;p&gt;Answers:&lt;/p&gt;

&lt;p&gt;1. I increased &quot;operational-timeout-in-seconds&quot; in module &quot;distributed-operational-store-module&quot; and &quot;distributed-config-store-module&quot;. I have no idea how to set &quot;transaction timeout&quot; to anything because the config files here are different from what I could find by searching &quot;akka&quot; in Google. However I found some config file which mentions &quot;akka&quot; and &quot;timeout&quot; in one section, so I am going to take look on that.&lt;/p&gt;

&lt;p&gt;2. I need to build a testcase and it will take a while (the test I use right now uses a repackaged build with custom configuration, I need to extract the customizations etc). Once done I will attach it here.&lt;/p&gt;

&lt;p&gt;3. About 4 minutes with IMDS. When CDS does not fail, then it also takes about 4 minutes. When it fails, it may take up to 1 hour (while it generates multi-GB log file).&lt;/p&gt;</comment>
                            <comment id="49963" author="jbehran@cisco.com" created="Tue, 10 Mar 2015 13:20:51 +0000"  >&lt;p&gt;Steps to reproduce with 1M of routes:&lt;/p&gt;

&lt;p&gt;1. Extract ODL tarball into your home directory.&lt;br/&gt;
2. Enter the directory that was made by step 2 and run bin/karaf.&lt;br/&gt;
3. Enter &quot;feature:install odl-bgpcep-bgp-all&quot;.&lt;br/&gt;
4. Enter &quot;feature:install odl-restconf-noauth&quot;.&lt;br/&gt;
5. Install clustering according to your wishes (persistence, replication, etc).&lt;br/&gt;
6. Enter &quot;logout&quot; and wait until karaf exits.&lt;br/&gt;
7. Copy the file &quot;41-bgp-example.xml&quot; from the attached package into etc/opendaylight/karaf (overwrite the file with the same name that is there).&lt;br/&gt;
8. Run bin/karaf again and wait about 5 minutes for ODL to boot (you can use &quot;top&quot; to shorten the wait, watch for the CPU usage of the Java process to drop below about 10% and stay there).&lt;br/&gt;
9. In another terminal extract the play.py from the attached package and then run &quot;python play.py --gencount=1000000&quot;.&lt;/p&gt;

&lt;p&gt;Notes:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;The &quot;41-bgp-example.xml&quot; file from the package sets up topology updating. That makes it much more likely to hit the bug.&lt;/li&gt;
	&lt;li&gt;If you intent to change the content of &quot;41-bgp-example.xml&quot;, do a DIFF betwen the original file and the one in the package.&lt;/li&gt;
	&lt;li&gt;In the last step you can specify any count you want in the --gencount argument, up to 180 million.&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="49970" author="jbehran@cisco.com" created="Tue, 10 Mar 2015 13:20:51 +0000"  >&lt;p&gt;Attachment test.tar.gz has been added with description: Package with tools for testing ODL with up to 180 million prefixes&lt;/p&gt;</comment>
                            <comment id="49964" author="jbehran@cisco.com" created="Tue, 10 Mar 2015 13:30:33 +0000"  >&lt;p&gt;According to Vratko, here is a faster path to hit the bug:&lt;/p&gt;

&lt;p&gt;1. Extract ODL tarball into your home directory.&lt;br/&gt;
2. Enter the directory that was made by step 2.&lt;br/&gt;
3. Copy the file &quot;41-bgp-example.xml&quot; from the attached package into directory etc/opendaylight/karaf (create the directory if it does not exist).&lt;br/&gt;
4. Run bin/karaf&lt;br/&gt;
5. Enter &quot;feature:install odl-bgpcep-bgp-all&quot;.&lt;br/&gt;
6. Enter &quot;feature:install odl-restconf-noauth&quot;.&lt;br/&gt;
7. Install clustering according to your wishes (persistence, replication, etc).&lt;br/&gt;
8. In another terminal extract the play.py from the attached package and then run &quot;python play.py --gencount=1000000&quot;.&lt;/p&gt;

&lt;p&gt;Additionally, try 2 million routes if you have difficulty hitting the bug.&lt;/p&gt;</comment>
                            <comment id="49965" author="harmasin@cisco.com" created="Tue, 14 Apr 2015 01:10:08 +0000"  >&lt;p&gt;Hi josef,&lt;/p&gt;

&lt;p&gt;I could not make your instructions work. The play.py scripts fails with following error, when i try to use it&lt;/p&gt;

&lt;p&gt;Traceback (most recent call last):&lt;br/&gt;
  File &quot;play.py&quot;, line 366, in &amp;lt;module&amp;gt;&lt;br/&gt;
    Main()&lt;br/&gt;
  File &quot;play.py&quot;, line 319, in Main&lt;br/&gt;
    FromODL,ToODL = ConnectToODL(args.myip, args.myport, args.peerip, args.peerport, CtlLog)&lt;br/&gt;
  File &quot;play.py&quot;, line 192, in ConnectToODL&lt;br/&gt;
    ODL.connect((peerip, int(peerport)))&lt;br/&gt;
  File &quot;/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py&quot;, line 224, in meth&lt;br/&gt;
    return getattr(self._sock,name)(*args)&lt;br/&gt;
socket.error: &lt;span class=&quot;error&quot;&gt;&amp;#91;Errno 60&amp;#93;&lt;/span&gt; Operation timed out&lt;/p&gt;

&lt;p&gt;Can you give me pointers what i need to do to reproduce it? I followed your comments written above.&lt;/p&gt;</comment>
                            <comment id="49966" author="jbehran@cisco.com" created="Thu, 30 Apr 2015 15:18:21 +0000"  >&lt;p&gt;(sorry for very late reply, I was overbooked during this month)&lt;/p&gt;

&lt;p&gt;1. Did you try to run the &quot;play.py&quot; command multiple times?&lt;br/&gt;
2. Did you try to restart ODL and try again? I sometimes experienced ODL to hang on installation of some feature, maybe in your case it got stuck around the code that handles the connection.&lt;br/&gt;
3. Do you have a link to the ODL build you are trying to test?&lt;/p&gt;

&lt;p&gt;Basically, the problem is that I could not reproduce this &quot;Operation timed out&quot; error on my setup.&lt;/p&gt;</comment>
                            <comment id="49967" author="moraja@cisco.com" created="Thu, 30 Apr 2015 15:23:40 +0000"  >&lt;p&gt;Jozef, in the meantime we did get over the problem of executing play.py. It works &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.opendaylight.org/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;</comment>
                            <comment id="49968" author="moraja@cisco.com" created="Thu, 4 Jun 2015 19:00:23 +0000"  >&lt;p&gt;Will track this issue as part of 3340 as it&apos;s newer and has more relevant info&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10002">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="25887">CONTROLLER-1333</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="13475" name="15k.cfg.xz" size="6244" author="jbehran@cisco.com" created="Wed, 17 Dec 2014 18:35:48 +0000"/>
                            <attachment id="13476" name="test.tar.gz" size="5121" author="jbehran@cisco.com" created="Tue, 10 Mar 2015 13:20:51 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                            <customfield id="customfield_11400" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10208" key="com.atlassian.jira.plugin.system.customfieldtypes:textfield">
                        <customfieldname>External issue ID</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>2518</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10201" key="com.atlassian.jira.plugin.system.customfieldtypes:url">
                        <customfieldname>External issue URL</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[https://bugs.opendaylight.org/show_bug.cgi?id=2518]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10206" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Issue Type</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10300"><![CDATA[Bug]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10204" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>ODL SR Target Milestone</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10315"><![CDATA[Lithium]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10202" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Priority</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10312"><![CDATA[High]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10000" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0|i02obb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>