<!-- 
RSS generated by JIRA (8.20.10#820010-sha1:ace47f9899e9ee25d7157d59aa17ab06aee30d3d) at Wed Feb 07 19:55:56 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>OpenDaylight JIRA</title>
    <link>https://jira.opendaylight.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>8.20.10</version>
        <build-number>820010</build-number>
        <build-date>22-06-2022</build-date>
    </build-info>


<item>
            <title>[CONTROLLER-1593] C: delete ds benchmark regression</title>
                <link>https://jira.opendaylight.org/browse/CONTROLLER-1593</link>
                <project id="10113" key="CONTROLLER">controller</project>
                    <description>&lt;p&gt;Based on the graph &lt;a href=&quot;https://jenkins.opendaylight.org/releng/view/controller/job/controller-csit-3node-periodic-benchmark-all-carbon/plot/getPlot?index=6&amp;amp;width=900&amp;amp;height=900&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jenkins.opendaylight.org/releng/view/controller/job/controller-csit-3node-periodic-benchmark-all-carbon/plot/getPlot?index=6&amp;amp;width=900&amp;amp;height=900&lt;/a&gt;   there is regression in DELETE operation.&lt;/p&gt;

&lt;p&gt;The comparison is done for builds #130 and #170&lt;br/&gt;
&lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/controller-csit-3node-periodic-benchmark-only-carbon/130/archives/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/controller-csit-3node-periodic-benchmark-only-carbon/130/archives/&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://logs.opendaylight.org/releng/jenkins092/controller-csit-3node-periodic-benchmark-only-carbon/170/archives/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://logs.opendaylight.org/releng/jenkins092/controller-csit-3node-periodic-benchmark-only-carbon/170/archives/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The log files canbe taken from the links above.&lt;br/&gt;
Log files odl1_karaf.log.gz and odl2_karaf.log.gz are huge after unzipping and are full of lines like&lt;br/&gt;
2017-02-02 12:54:11,425 | WARN  | tp1150926568-510 | SimpletxDomRead                  | 294 - org.opendaylight.controller.dsbenchmark - 1.3.0.SNAPSHOT | optionalDataObject is either null or .isPresent is false&lt;/p&gt;


&lt;p&gt;The regression happened only for config datastore (both leader and follower), operational was not affected.&lt;/p&gt;</description>
                <environment>&lt;p&gt;Operating System: All&lt;br/&gt;
Platform: All&lt;/p&gt;</environment>
        <key id="26147">CONTROLLER-1593</key>
            <summary>C: delete ds benchmark regression</summary>
                <type id="10104" iconUrl="https://jira.opendaylight.org/secure/viewavatar?size=xsmall&amp;avatarId=10303&amp;avatarType=issuetype">Bug</type>
                                                <status id="5" iconUrl="https://jira.opendaylight.org/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="green"/>
                                    <resolution id="10000">Done</resolution>
                                        <assignee username="-1">Unassigned</assignee>
                                    <reporter username="pgubka@cisco.com">Peter Gubka</reporter>
                        <labels>
                    </labels>
                <created>Tue, 7 Feb 2017 05:29:13 +0000</created>
                <updated>Tue, 25 Jul 2023 08:24:21 +0000</updated>
                            <resolved>Fri, 21 Apr 2017 09:42:55 +0000</resolved>
                                                                    <component>clustering</component>
                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                                                                <comments>
                            <comment id="51744" author="pgubka@cisco.com" created="Tue, 7 Feb 2017 05:32:24 +0000"  >&lt;p&gt;It may be related to an older issue &lt;a href=&quot;https://bugs.opendaylight.org/show_bug.cgi?id=7390&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://bugs.opendaylight.org/show_bug.cgi?id=7390&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="51745" author="pgubka@cisco.com" created="Tue, 7 Feb 2017 09:30:22 +0000"  >&lt;p&gt;Affected is only config ds for both leader and follower. Times are in miliseconds and are taken from build #130 and #170&lt;/p&gt;

&lt;p&gt;#130          #170&lt;br/&gt;
61.082625     977.885625    FOL_CONFIG-BINDING-AWARE-SIMPLE-TX-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
47.062875     935.484125    FOL_CONFIG-BINDING-AWARE-TX-CHAINING-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
33.681125     979.43        FOL_CONFIG-BINDING-INDEPENDENT-SIMPLE-TX-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
41.708875     977.866625    FOL_CONFIG-BINDING-INDEPENDENT-TX-CHAINING-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
86.716        703.5745      CONFIG-BINDING-AWARE-SIMPLE-TX-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
92.4615       707.398375    CONFIG-BINDING-AWARE-TX-CHAINING-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
84.083875     676.697125    CONFIG-BINDING-INDEPENDENT-SIMPLE-TX-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
92.229        700.476       CONFIG-BINDING-INDEPENDENT-TX-CHAINING-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
 8.877125       2.464       FOL_OPERATIONAL-BINDING-AWARE-SIMPLE-TX-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
 8.02175        4.815375    FOL_OPERATIONAL-BINDING-AWARE-TX-CHAINING-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
 2.282          2.27075     FOL_OPERATIONAL-BINDING-INDEPENDENT-SIMPLE-TX-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
 7.426875       4.394625    FOL_OPERATIONAL-BINDING-INDEPENDENT-TX-CHAINING-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
 0.612625       1.598125    OPERATIONAL-BINDING-AWARE-SIMPLE-TX-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
 1.25825        1.911375    OPERATIONAL-BINDING-AWARE-TX-CHAINING-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
 0.535875       2.062625    OPERATIONAL-BINDING-INDEPENDENT-SIMPLE-TX-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;br/&gt;
 0.86975        2.579875    OPERATIONAL-BINDING-INDEPENDENT-TX-CHAINING-DELETE-1/100000OUTER/INNER-1OP-EXEC&lt;/p&gt;</comment>
                            <comment id="51746" author="rovarga" created="Thu, 13 Apr 2017 12:04:56 +0000"  >&lt;p&gt;The graph shows around 6x regression in delete performance on January 9th. This is not reflected in Boron, so it is caused by something we did in Carbon around that time frame.&lt;/p&gt;

&lt;p&gt;Since this is config data store, there are two possible causes:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;a change in yangtools to DataTree which makes deletes less efficient&lt;/li&gt;
	&lt;li&gt;a change in controller around persistence&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;I did not find anything in yangtools. There are two candidates in controller:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;&lt;a href=&quot;https://git.opendaylight.org/gerrit/49466&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/49466&lt;/a&gt;, which I think was reverted&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://git.opendaylight.org/gerrit/49404&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/49404&lt;/a&gt;, which I find unlikely&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://git.opendaylight.org/gerrit/49971&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/49971&lt;/a&gt;, which does not quite fit the timeline&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Is there a way to dig narrow the window when the regression occurred?&lt;/p&gt;</comment>
                            <comment id="51747" author="rovarga" created="Thu, 20 Apr 2017 11:25:02 +0000"  >&lt;p&gt;So far the best candidate for causing the regression is &lt;a href=&quot;https://git.opendaylight.org/gerrit/49404&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/49404&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="51748" author="rovarga" created="Thu, 20 Apr 2017 12:04:42 +0000"  >&lt;p&gt;That is based on the fact that the regression occurred somewhere before builds #133 and #143.&lt;/p&gt;

&lt;p&gt;The patch introduced a change in behaviour &amp;#8211; we are now performing DataTree operations and invoking listeners on the follower before responding to the shard leader, which I think is what limits delete performance here as AppendEntry consensus is built up more slowly than before and is actually affected by listener code execution &amp;#8211; notably by DataChangeListener, which are not off-loaded and are expensive to compute.&lt;/p&gt;</comment>
                            <comment id="51749" author="tpantelis" created="Thu, 20 Apr 2017 13:01:09 +0000"  >&lt;p&gt;(In reply to Robert Varga from comment #5)&lt;br/&gt;
&amp;gt; That is based on the fact that the regression occurred somewhere before&lt;br/&gt;
&amp;gt; builds #133 and #143.&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; The patch introduced a change in behaviour &amp;#8211; we are now performing DataTree&lt;br/&gt;
&amp;gt; operations and invoking listeners on the follower before responding to the&lt;br/&gt;
&amp;gt; shard leader, which I think is what limits delete performance here as&lt;br/&gt;
&amp;gt; AppendEntry consensus is built up more slowly than before and is actually&lt;br/&gt;
&amp;gt; affected by listener code execution &amp;#8211; notably by DataChangeListener, which&lt;br/&gt;
&amp;gt; are not off-loaded and are expensive to compute.&lt;/p&gt;

&lt;p&gt;That patch fixed an inconsistency issue between lastAppliedIndex and what is actually applied to the data tree for snapshots. &lt;/p&gt;

&lt;p&gt;ApplyState actually occurs after consensus as the leader has to first get consensus and update the commitIndex before updating lastAppliedIndex. However an AppendEntries could append new entry/entries for consensus and also apply previous entries. &lt;/p&gt;

&lt;p&gt;In the end, the state will still eventually apply and block subsequent AppendEntries but replying before it applies the previous state, as you mentioned, should get us back to the previous behavior and allow the leader to proceed with consensus faster.&lt;/p&gt;

&lt;p&gt;DataChangeListener notification generation is offloaded on a separate actor but there is a 10 ms mailbox-push-timeout-time which can block the shard actor. But if we&apos;re hitting that then we&apos;re reaching the 5000 mailbox-capacity and thus are in danger of dropping notifications. They would go to dead letters - hopefully that&apos;s not occurring.&lt;/p&gt;</comment>
                            <comment id="51750" author="rovarga" created="Thu, 20 Apr 2017 13:22:32 +0000"  >&lt;p&gt;Do both DCL and DTCL have dedicated actors? Also, for DCL determining whom to notify (i.e. while the registration tree is locked) is actually expensive, if I remember correctly...&lt;/p&gt;</comment>
                            <comment id="51751" author="tpantelis" created="Thu, 20 Apr 2017 13:30:22 +0000"  >&lt;p&gt;Submitted &lt;a href=&quot;https://git.opendaylight.org/gerrit/#/c/55733/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/#/c/55733/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="51752" author="tpantelis" created="Thu, 20 Apr 2017 13:33:36 +0000"  >&lt;p&gt;(In reply to Robert Varga from comment #7)&lt;br/&gt;
&amp;gt; Do both DCL and DTCL have dedicated actors? Also, for DCL determining whom&lt;br/&gt;
&amp;gt; to notify (i.e. while the registration tree is locked) is actually&lt;br/&gt;
&amp;gt; expensive, if I remember correctly...&lt;/p&gt;

&lt;p&gt;yes - that&apos;s all done on a separate actor. Also the registration tree lock previously could block the shard on listener registration but that was recently changed to add new listeners on the actor as well.&lt;/p&gt;</comment>
                            <comment id="51753" author="rovarga" created="Thu, 20 Apr 2017 13:49:41 +0000"  >&lt;p&gt;I guess the question is when do we consider the entry as applied. There are essentially five choices:&lt;/p&gt;

&lt;p&gt;1) when we have validated the AppendEntries&lt;br/&gt;
2) when we have updated the log indices&lt;br/&gt;
3) when we have applied the entries to DataTree&lt;br/&gt;
4) when we have queued up notifications&lt;br/&gt;
5) when notifications have been delivered&lt;/p&gt;

&lt;p&gt;Previously we have done 1), currently we are doing 4), your patch reverts that to 1) (I think).&lt;/p&gt;

&lt;p&gt;I think we want to go at least as far as 3), as at that point our internal journal records match what we have told the leader and DataTree actually reflects those changes.&lt;/p&gt;

&lt;p&gt;I wonder what will happen if we send the response to the leader and we fail due to an exception before we finish processing the message...&lt;/p&gt;</comment>
                            <comment id="51754" author="rovarga" created="Thu, 20 Apr 2017 13:54:36 +0000"  >&lt;p&gt;Ah, right, both notifications are offloaded since that recent patch... so I guess we&apos;ll give it a day or two to see if the performance has changed &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.opendaylight.org/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;</comment>
                            <comment id="51755" author="tpantelis" created="Thu, 20 Apr 2017 19:45:44 +0000"  >&lt;p&gt;(In reply to Robert Varga from comment #10)&lt;br/&gt;
&amp;gt; I guess the question is when do we consider the entry as applied. There are&lt;br/&gt;
&amp;gt; essentially five choices:&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; 1) when we have validated the AppendEntries&lt;br/&gt;
&amp;gt; 2) when we have updated the log indices&lt;br/&gt;
&amp;gt; 3) when we have applied the entries to DataTree&lt;br/&gt;
&amp;gt; 4) when we have queued up notifications&lt;br/&gt;
&amp;gt; 5) when notifications have been delivered&lt;br/&gt;
&amp;gt; &lt;br/&gt;
&amp;gt; Previously we have done 1), currently we are doing 4), your patch reverts&lt;br/&gt;
&amp;gt; that to 1) (I think).&lt;/p&gt;

&lt;p&gt;My new patch essentially reverts it back to the behavior prior to &lt;a href=&quot;https://git.opendaylight.org/gerrit/49404&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/49404&lt;/a&gt;, in terms of when the reply is sent wrt to applying previous state. Prior, handleAppendEntries updated lastAppliedIndex and queued an ApplyState message to apply to the data tree, then replied to the leader. So handleAppendEntries always returned before applying the previous state. However this resulted in an inconsistency in the persisted state between lastAppliedIndex and the actual applied state in the snapshot if one was triggered immediately after but before ApplyState was processed. Patch 49404 fixed that by making the 2 atomic. This new patch merely reverts it back to the behavior of handleAppendEntries replying before the previous state is applied.&lt;/p&gt;

&lt;p&gt;&amp;gt; &lt;br/&gt;
&amp;gt; I think we want to go at least as far as 3), as at that point our internal&lt;br/&gt;
&amp;gt; journal records match what we have told the leader and DataTree actually&lt;br/&gt;
&amp;gt; reflects those changes.&lt;/p&gt;

&lt;p&gt;As far as the leader is concerned when sending new entries, it&apos;s only interested in as far as 2). Remember, normally it takes 2 AppendEntries to get an entry applied to the state. The first is sent with the new entry to append to the journal and the second is sent with the updated commit index after the first one responds and consensus is reached. The second AppendEntries may contain subsequent new entries and prior ones may be applied to the state as well as a side effect but the leader is not concerned with the latter and we don&apos;t need to block the reply. &lt;/p&gt;

&lt;p&gt;&amp;gt; &lt;br/&gt;
&amp;gt; I wonder what will happen if we send the response to the leader and we fail&lt;br/&gt;
&amp;gt; due to an exception before we finish processing the message...&lt;/p&gt;

&lt;p&gt;It doesn&apos;t matter - either way we respond to the leader. We don&apos;t report apply state failures to the leader. The leader is only concerned that entries were appended to the journal and persisted. Raft doesn&apos;t define apply state failures wrt the leader.&lt;/p&gt;</comment>
                            <comment id="51756" author="tcere" created="Fri, 21 Apr 2017 09:42:55 +0000"  >&lt;p&gt;Tested locally, delete perf seems to be back on boron numbers.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                            <customfield id="customfield_11400" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10208" key="com.atlassian.jira.plugin.system.customfieldtypes:textfield">
                        <customfieldname>External issue ID</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7747</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10201" key="com.atlassian.jira.plugin.system.customfieldtypes:url">
                        <customfieldname>External issue URL</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue><![CDATA[https://bugs.opendaylight.org/show_bug.cgi?id=7747]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10206" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Issue Type</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10300"><![CDATA[Bug]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10000" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0|i02rj3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>