<!-- 
RSS generated by JIRA (8.20.10#820010-sha1:ace47f9899e9ee25d7157d59aa17ab06aee30d3d) at Wed Feb 07 20:37:26 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>OpenDaylight JIRA</title>
    <link>https://jira.opendaylight.org</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>8.20.10</version>
        <build-number>820010</build-number>
        <build-date>22-06-2022</build-date>
    </build-info>


<item>
            <title>[RELENG-75] Heat scripts fail to bring nodes online</title>
                <link>https://jira.opendaylight.org/browse/RELENG-75</link>
                <project id="10164" key="RELENG">releng</project>
                    <description>&lt;p&gt;&lt;a href=&quot;https://jira.opendaylight.org/secure/ViewProfile.jspa?name=jluhrsen&quot; class=&quot;user-hover&quot; rel=&quot;jluhrsen&quot;&gt;jluhrsen&lt;/a&gt; mentioned that CSIT jobs are often failing to come online and failing job builds for example see the history of &lt;span class=&quot;error&quot;&gt;&amp;#91;0&amp;#93;&lt;/span&gt;. It seems to be failing often enough.&lt;/p&gt;


&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;0&amp;#93;&lt;/span&gt; &lt;a href=&quot;https://jenkins.opendaylight.org/releng/user/jluhrsen/my-views/view/netvirt%20csit/job/netvirt-csit-1node-openstack-ocata-upstream-stateful-snat-conntrack-oxygen/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jenkins.opendaylight.org/releng/user/jluhrsen/my-views/view/netvirt%20csit/job/netvirt-csit-1node-openstack-ocata-upstream-stateful-snat-conntrack-oxygen/&lt;/a&gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="29040">RELENG-75</key>
            <summary>Heat scripts fail to bring nodes online</summary>
                <type id="10001" iconUrl="https://jira.opendaylight.org/images/icons/issuetypes/story.svg">Story</type>
                                            <priority id="3" iconUrl="https://jira.opendaylight.org/images/icons/priorities/major.svg">Medium</priority>
                        <status id="5" iconUrl="https://jira.opendaylight.org/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="green"/>
                                    <resolution id="10000">Done</resolution>
                                        <assignee username="zxiiro">Thanh Ha (zxiiro)</assignee>
                                    <reporter username="zxiiro">Thanh Ha (zxiiro)</reporter>
                        <labels>
                    </labels>
                <created>Thu, 4 Jan 2018 20:28:48 +0000</created>
                <updated>Fri, 12 Jan 2018 15:58:22 +0000</updated>
                            <resolved>Fri, 12 Jan 2018 15:58:22 +0000</resolved>
                                                                    <component>Jenkins Job Builder</component>
                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                                                                <comments>
                            <comment id="60597" author="zxiiro" created="Thu, 4 Jan 2018 20:32:02 +0000"  >&lt;p&gt;Not sure if this is the cause but Heat stack cleanup scripts currently run every 15 minutes. There&apos;s definitely a race condition that&apos;s possible here between when a stack is in progress creating while the VM is coming fully online and is passed back to the CSIT job. It&apos;s possible that the script&apos;s list of stacks contains stacks that are in this in progress state while it&apos;s coming online and deleting the stacks before it can be passed back to the CSIT job.&lt;/p&gt;

&lt;p&gt;We should re-evaluate this script and see if we can add some smarts into it to improve things.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://github.com/opendaylight/releng-builder/blob/master/jjb/opendaylight-infra-cleanup-stale-stacks.sh&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/opendaylight/releng-builder/blob/master/jjb/opendaylight-infra-cleanup-stale-stacks.sh&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="60598" author="zxiiro" created="Thu, 4 Jan 2018 22:36:14 +0000"  >&lt;p&gt;Proposed patches: &lt;/p&gt;

&lt;ul&gt;
	&lt;li&gt;&lt;a href=&quot;https://git.opendaylight.org/gerrit/66881&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/66881&lt;/a&gt;&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://git.opendaylight.org/gerrit/66884&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/66884&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Merged.&lt;/p&gt;</comment>
                            <comment id="60600" author="zxiiro" created="Fri, 5 Jan 2018 00:26:53 +0000"  >&lt;p&gt;I suspect the previous patch might not solve the problem after poking at it more deeply. I decided to additionally add &lt;a href=&quot;https://git.opendaylight.org/gerrit/66883&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/66883&lt;/a&gt; patch to inprove the debug output to make it more useful. Hopefully we can get more useful info the next time it happens.&lt;/p&gt;</comment>
                            <comment id="60606" author="zxiiro" created="Fri, 5 Jan 2018 16:34:15 +0000"  >&lt;p&gt;With the additional logging we got from the stack patches last night we know what the real issue is now. The error message is:&lt;/p&gt;

&lt;p&gt;Resource CREATE failed: ResourceInError: resources.vm_0_group.resources&lt;span class=&quot;error&quot;&gt;&amp;#91;0&amp;#93;&lt;/span&gt;.resources.instance: Went to status ERROR due to &quot;Message: No valid host was found. There are not enough hosts available., Code: 500&lt;/p&gt;

&lt;p&gt;Which tells me we&apos;re using too many robot systems. Taking a look at releng/builder I noticed that we now have both 1c and 2c robot nodes each allowing 25 parallel robots to run. This is not a good idea. Unfortunately with Robot systems every job should use the same robot vm so that we can properly limit the max nodes. I think next steps here are:&lt;/p&gt;

&lt;p&gt;1. Contact the cloud provider (Done)&lt;br/&gt;
2. Switch all releng/builder jobs to the 2c robot nodes&lt;br/&gt;
3. If necessary reduce the limit of robot vms further&lt;/p&gt;
</comment>
                            <comment id="60607" author="zxiiro" created="Fri, 5 Jan 2018 16:45:43 +0000"  >&lt;p&gt;Proposed patch to move all robots to the same type &lt;a href=&quot;https://git.opendaylight.org/gerrit/66904&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/66904&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="60608" author="zxiiro" created="Fri, 5 Jan 2018 17:08:42 +0000"  >&lt;p&gt;Add a test to check that we only ever use 1 robot node &lt;a href=&quot;https://git.opendaylight.org/gerrit/66905&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://git.opendaylight.org/gerrit/66905&lt;/a&gt; this ensures future changes does not miss this detail.&lt;/p&gt;</comment>
                            <comment id="60675" author="zxiiro" created="Fri, 12 Jan 2018 15:58:22 +0000"  >&lt;p&gt;Infra seems to be stable now so closing this off as resolved. It seems the changes we made last week made things a lot better.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                            <customfield id="customfield_11400" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10000" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0|i039b3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                            </customfields>
    </item>
</channel>
</rss>