<!-- 
RSS generated by JIRA (8.20.10#820010-sha1:ace47f9899e9ee25d7157d59aa17ab06aee30d3d) at Wed Feb 07 19:56:21 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>OpenDaylight JIRA</title>
    <link>https://jira.opendaylight.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>8.20.10</version>
        <build-number>820010</build-number>
        <build-date>22-06-2022</build-date>
    </build-info>


<item>
            <title>[CONTROLLER-1746] OOM with large number of closed transactions</title>
                <link>https://jira.opendaylight.org/browse/CONTROLLER-1746</link>
                <project id="10113" key="CONTROLLER">controller</project>
                    <description>&lt;p&gt;On a single-node Carbon SR1 setup, we got an OOM over the weekend. Looking at the heap dump shows 1.1GB occupied in the frontend history&apos;s closed transactions map, with 33M entries.&lt;/p&gt;

&lt;p&gt;I&apos;ll upload the heap dump and add a link to it.&lt;/p&gt;</description>
                <environment>&lt;p&gt;Operating System: All&lt;br/&gt;
Platform: All&lt;/p&gt;</environment>
        <key id="26300">CONTROLLER-1746</key>
            <summary>OOM with large number of closed transactions</summary>
                <type id="10104" iconUrl="https://jira.opendaylight.org/secure/viewavatar?size=xsmall&amp;avatarId=10303&amp;avatarType=issuetype">Bug</type>
                                                <status id="5" iconUrl="https://jira.opendaylight.org/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="green"/>
                                    <resolution id="10000">Done</resolution>
                                        <assignee username="rovarga">Robert Varga</assignee>
                                    <reporter username="skitt">Stephen Kitt</reporter>
                        <labels>
                    </labels>
                <created>Mon, 7 Aug 2017 13:45:19 +0000</created>
                <updated>Thu, 11 Oct 2018 11:04:06 +0000</updated>
                            <resolved>Tue, 2 Oct 2018 18:31:25 +0000</resolved>
                                    <version>Carbon</version>
                                    <fixVersion>Neon</fixVersion>
                    <fixVersion>Fluorine SR1</fixVersion>
                    <fixVersion>Oxygen SR4</fixVersion>
                                    <component>mdsal</component>
                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                                                                <comments>
                            <comment id="52531" author="skitt@redhat.com" created="Mon, 7 Aug 2017 15:04:49 +0000"  >&lt;p&gt;The heap dump is on &lt;a href=&quot;https://www.sk2.org/java_pid2098.hprof.xz&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://www.sk2.org/java_pid2098.hprof.xz&lt;/a&gt; (269MB).&lt;/p&gt;</comment>
                            <comment id="52532" author="rovarga" created="Mon, 7 Aug 2017 15:49:20 +0000"  >&lt;p&gt;Affects both Carbon and Nitrogen. Blocking as this this a regression.&lt;/p&gt;

&lt;p&gt;Confirmed by analyzing the dump reported in &lt;a href=&quot;https://git.opendaylight.org/gerrit/#/c/61034/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/#/c/61034/&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="52533" author="rovarga" created="Mon, 7 Aug 2017 16:15:27 +0000"  >&lt;p&gt;This is ask-based protocol and we are looking at transactions which have been closed but not purged. Tell-based protocol issues explicit purges from the frontend, for ask-based protocol this has to be done on the backend.&lt;/p&gt;

&lt;p&gt;Examining the data, these are free-standing transactions, which have been committed &amp;#8211; hence we are missing the purge step.&lt;/p&gt;</comment>
                            <comment id="52534" author="rovarga" created="Mon, 7 Aug 2017 16:19:24 +0000"  >&lt;p&gt;master: &lt;a href=&quot;https://git.opendaylight.org/gerrit/61284&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/61284&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52535" author="vorburger" created="Tue, 8 Aug 2017 13:04:34 +0000"  >&lt;p&gt;skitt: it&#8217;s a Carbon SR1 regression so anything older is something else&lt;/p&gt;

&lt;p&gt;skitt: it&#8217;s easy to spot, Eclipse MAT tells you the biggest leak culprit is a huge completedTransactions HashMap$Node[]&lt;/p&gt;</comment>
                            <comment id="52536" author="skitt@redhat.com" created="Tue, 8 Aug 2017 13:18:20 +0000"  >&lt;p&gt;(In reply to Michael Vorburger from comment #5)&lt;br/&gt;
&amp;gt; skitt: it&#8217;s a Carbon SR1 regression so anything older is something else&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; skitt: it&#8217;s easy to spot, Eclipse MAT tells you the biggest leak culprit is&lt;br/&gt;
&amp;gt; a huge completedTransactions HashMap$Node[]&lt;/p&gt;

&lt;p&gt;closeTransactions, not completeTransactions&lt;/p&gt;</comment>
                            <comment id="52537" author="skitt@redhat.com" created="Tue, 8 Aug 2017 13:18:58 +0000"  >&lt;p&gt;(In reply to Stephen Kitt from comment #6)&lt;br/&gt;
&amp;gt; (In reply to Michael Vorburger from comment #5)&lt;br/&gt;
&amp;gt; &amp;gt; skitt: it&#8217;s a Carbon SR1 regression so anything older is something else&lt;br/&gt;
&amp;gt; &amp;gt; &lt;br/&gt;
&amp;gt; &amp;gt; skitt: it&#8217;s easy to spot, Eclipse MAT tells you the biggest leak culprit is&lt;br/&gt;
&amp;gt; &amp;gt; a huge completedTransactions HashMap$Node[]&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; closeTransactions, not completeTransactions&lt;/p&gt;

&lt;p&gt;close*d*Transactions, not complete*d*Transactions&lt;/p&gt;</comment>
                            <comment id="52538" author="vorburger" created="Tue, 8 Aug 2017 13:45:20 +0000"  >&lt;p&gt;&amp;gt; close*d*Transactions, not complete*d*Transactions&lt;/p&gt;

&lt;p&gt;closedTransactions &amp;lt;= Dear Bugzilla indexer, please dig this.. &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.opendaylight.org/images/icons/emoticons/wink.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;  &lt;span class=&quot;error&quot;&gt;&amp;#91;We will not find this bug in the future when searching for &amp;quot;closedTransactions&amp;quot; without this.&amp;#93;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="52539" author="rovarga" created="Wed, 9 Aug 2017 20:47:09 +0000"  >&lt;p&gt;carbon: &lt;a href=&quot;https://git.opendaylight.org/gerrit/61433&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/61433&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Awaiting confirmation the fix works.&lt;/p&gt;</comment>
                            <comment id="52540" author="skitt@redhat.com" created="Fri, 11 Aug 2017 11:45:57 +0000"  >&lt;p&gt;The fix looks OK, I&#8217;m seeing transactions being purged properly.&lt;/p&gt;</comment>
                            <comment id="52541" author="anipbu" created="Mon, 14 Aug 2017 17:54:15 +0000"  >&lt;p&gt;AR builds are failing on carbon and only one patch that has been merged recently.  Please confirm if this change could have potentially caused an issue:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://lists.opendaylight.org/pipermail/release/2017-August/011971.html&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://lists.opendaylight.org/pipermail/release/2017-August/011971.html&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="52542" author="rovarga" created="Mon, 14 Aug 2017 19:16:17 +0000"  >&lt;p&gt;Yes, as the change introduces asynchronous variance to production code under normal operation. This introduces a time window of ~100ms, where UT code can observe a transient state.&lt;/p&gt;

&lt;p&gt;As far as we can tell the issue lies solely with the UT suite, which needs to be fixed to account for the variance in behavior between autorelease VM, verify/merge VMs and developer environment.&lt;/p&gt;

&lt;p&gt;Of the two issues seen in autorelease, one already has a fix, &lt;a href=&quot;https://git.opendaylight.org/gerrit/61626&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/61626&lt;/a&gt;, which has been reviewed. The other failure is under investigation.&lt;/p&gt;</comment>
                            <comment id="52543" author="rovarga" created="Mon, 14 Aug 2017 19:17:32 +0000"  >&lt;p&gt;By no means can we revert the fix: it fixes a long-term stability regression we introduced between Carbon and Carbon SR1. It has been reported from the field.&lt;/p&gt;</comment>
                            <comment id="52544" author="rovarga" created="Mon, 14 Aug 2017 21:16:52 +0000"  >&lt;p&gt;&lt;a href=&quot;https://git.opendaylight.org/gerrit/61720&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/61720&lt;/a&gt; should fix the second UT failure&lt;/p&gt;</comment>
                            <comment id="64775" author="zhfinder" created="Mon, 27 Aug 2018 07:00:32 +0000"  >&lt;p&gt;I apologize for accidentally reopen this bug when review it. Hope not make any trouble.&lt;/p&gt;</comment>
                            <comment id="65002" author="jamescch" created="Thu, 20 Sep 2018 08:34:40 +0000"  >&lt;p&gt;This bug wasn&apos;t fixed. Still seeing a huge amount of closedTransactions in Oxygen-sr3.&lt;/p&gt;

&lt;p&gt;The code in FrontendHistoryMetadataBuilder:&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
void onTransactionCommitted(&lt;span class=&quot;code-keyword&quot;&gt;final&lt;/span&gt; TransactionIdentifier txId) {                         
    closedTransactions.put(UnsignedLong.fromLongBits(txId.getTransactionId()), &lt;span class=&quot;code-object&quot;&gt;Boolean&lt;/span&gt;.TRUE);
}
void onTransactionPurged(&lt;span class=&quot;code-keyword&quot;&gt;final&lt;/span&gt; TransactionIdentifier txId) { 
    &lt;span class=&quot;code-keyword&quot;&gt;final&lt;/span&gt; UnsignedLong id = UnsignedLong.fromLongBits(txId.getTransactionId());
    closedTransactions.remove(id);
    purgedTransactions.add(Range.closedOpen(id, UnsignedLong.ONE.plus(id))); 
}
 &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;closedTransaction&#160;will be added on transaction committed and removed on transaction purged.&lt;/p&gt;

&lt;p&gt;But it seems that these two operations have been disordered.&lt;/p&gt;

&lt;p&gt;log:set debug&#160;org.opendaylight.controller.cluster.datastore.FrontendClientMetadataBuilder&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
2018-09-20 16:17:50,466 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Purged transaction member-1-datastore-operational-fe-0-chn-7-txn-58-0
2018-09-20 16:17:50,466 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Committed transaction member-1-datastore-operational-fe-0-chn-7-txn-58-0
2018-09-20 16:17:50,483 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Purged transaction member-1-datastore-operational-fe-0-chn-7-txn-59-0
2018-09-20 16:17:50,486 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Committed transaction member-1-datastore-operational-fe-0-chn-7-txn-59-0
2018-09-20 16:17:50,496 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Purged transaction member-1-datastore-operational-fe-0-chn-6-txn-90-0
2018-09-20 16:17:50,497 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Committed transaction member-1-datastore-operational-fe-0-chn-6-txn-90-0
2018-09-20 16:17:50,498 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Purged transaction member-1-datastore-operational-fe-0-chn-6-txn-91-0
2018-09-20 16:17:50,498 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Committed transaction member-1-datastore-operational-fe-0-chn-6-txn-91-0
2018-09-20 16:17:50,514 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Purged transaction member-1-datastore-operational-fe-0-chn-7-txn-60-0
2018-09-20 16:17:50,519 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Committed transaction member-1-datastore-operational-fe-0-chn-7-txn-60-0
2018-09-20 16:17:50,520 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Purged transaction member-1-datastore-operational-fe-0-chn-7-txn-61-0
2018-09-20 16:17:50,520 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Committed transaction member-1-datastore-operational-fe-0-chn-7-txn-61-0
2018-09-20 16:17:50,577 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Purged transaction member-1-datastore-operational-fe-0-chn-5-txn-85-0
2018-09-20 16:17:50,577 | DEBUG | dispatcher-24 | lientMetadataBuilder | member-1-shard-inventory-operational: Committed 
transaction member-1-datastore-operational-fe-0-chn-5-txn-85-0

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Purged is always executed before committed for the same transaction. So the closed transactions keep being added back.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="65104" author="rovarga" created="Mon, 1 Oct 2018 12:09:28 +0000"  >&lt;p&gt;I think we need frontend debugs as well to understand what is going on. Frontend should not be issuing a purge before it sees the transaction committed.&lt;/p&gt;</comment>
                            <comment id="65154" author="rovarga" created="Tue, 2 Oct 2018 14:21:54 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.opendaylight.org/secure/ViewProfile.jspa?name=jamescch&quot; class=&quot;user-hover&quot; rel=&quot;jamescch&quot;&gt;jamescch&lt;/a&gt; is this reproducible with the configuration data store?&lt;/p&gt;</comment>
                            <comment id="65155" author="rovarga" created="Tue, 2 Oct 2018 14:29:40 +0000"  >&lt;p&gt;So this is a single node, with operational store. Unless this is reproducible with three nodes or with configuration data store, it would seem to indicate that sal-akka-raft is playing tricks by invoking us back.&lt;/p&gt;</comment>
                            <comment id="65159" author="rovarga" created="Tue, 2 Oct 2018 15:05:03 +0000"  >&lt;p&gt;Actually this is a problem with ShardDataTree.applyReplicatedPayload(), where we go down through payloadReplicationComplete(), but invoke allMetadataCommittedTransaction() only once we return.&lt;/p&gt;

&lt;p&gt;This works with the Shard.replicatePayload() shortcut to subvert us: payloadReplicationComplete() will apply the state, but will also run through completion callbacks, which will issue a purge payload replication, which will bounce on stack too &#8211; hence it will completely execute before we get to invoking allMetadataCommittedTransaction(). Thus the mis-ordering will happen. Replication/persistence will thwart the replicatePayload() shortcut, hence we are good with three nodes or persistence.&lt;/p&gt;</comment>
                            <comment id="65319" author="jamescch" created="Thu, 11 Oct 2018 11:04:06 +0000"  >&lt;p&gt;Great! The fix works well. The order looks correct now.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                            <customfield id="customfield_11400" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10208" key="com.atlassian.jira.plugin.system.customfieldtypes:textfield">
                        <customfieldname>External issue ID</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>8941</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10201" key="com.atlassian.jira.plugin.system.customfieldtypes:url">
                        <customfieldname>External issue URL</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[https://bugs.opendaylight.org/show_bug.cgi?id=8941]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10206" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Issue Type</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10300"><![CDATA[Bug]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10204" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>ODL SR Target Milestone</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10337"><![CDATA[Carbon-SR2]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10000" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0|i02sh3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>