HTTP retriever regex help
-
Hi,
I would like to use a HTTP retriever data source to parse three values (for Power, Daily Yield and Total Yield) from the HTML below.
This is for a SMA Webbox, which is a fairly popular monitoring device for SMA brand inverters.Is anyone able to help out with the Regex? None of my (embarassing) attempts have worked.
Thanks!
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <title>Sunny WebBox</title> <link href="css/style.css" type="text/css" rel="stylesheet"> <link href="css/home.css" type="text/css" rel="stylesheet"> <script type="text/javascript"> <!-- top.frames[2].location.reload(); --> </script> <script type="text/javascript" src="script/json.js"></script> <script type="text/javascript" src="script/prototype.js"></script> <script type="text/javascript" src="script/traffic.js"></script> <script type="text/javascript" src="script/home.js"></script> <script type="text/javascript"> <!-- function checkFrameset() { if(self.parent.name != "mainFrame") { top.location = "index.html"; } } --> </script> </head> <body> <script type="text/javascript"> <!-- checkFrameset(); --> </script> <table class="home-layout-table" id="Table1"> <tr> <td align="center" class="tdcol0"> <img src="image/webbox.gif" alt="SunnyWebBox"> </td> <td class="tdcol1"> <table id="OvTbl" class="home-overview-table"> <tr class="evenrow"> <td class="tdcol0">Power:</td> <td id="Power" class="tdcol1">1458 W</td> </tr> <tr class="oddrow"> <td class="tdcol0">Daily yield:</td> <td id="DailyYield" class="tdcol1">10.88 kWh</td> </tr> <tr class="evenrow"> <td class="tdcol0">Total yield:</td> <td id="TotalYield" class="tdcol1">1239.21 MWh</td> </tr> </table> <form method="post" action="login" target="mainFrame"> <table class="home-control-table" id="Table3"> <tr> <td class="tdcol0">Language:</td> <td class="tdcol1" colspan="2"><select name="Language" size="1"> <option value="cs">?eština </option> <option value="de">Deutsch </option> <option value="en" selected>English </option> <option value="es">Español </option> <option value="fr">Français </option> <option value="it">Italiano </option> <option value="nl">Nederlands </option> <option value="pt">Português </option> <option value="el">???????? </option> <option value="ko">??? </option> </select> </td> </tr> <tr> <td height="5px"></td> </tr> <tr> <td class="tdcol0">Password:</td> <td class="tdcol1"><input name="Password" type="password" size="32" class="input-text"></td> </tr> <tr> <td></td> <td class="buttonrow"><input name="ButtonLogin" type="submit" size="5" class="input-submit" value="Login"></td> </tr> <tr> <td class="messagerow" colspan="2">This password was not recognised.</td> </tr> </table> </form> </td> </tr> </table> </body> </html>
-
jeremyh,
The following will pull out the value 1458 for the Power value in your example:
"<td id=\"Power\" class=\"tdcol1\">(.*) W</td>"
You should be able to change the ID string to get the other values you need.
Terry
-
Thank you very much Terrypacker. That works.
For anyone else wishing to use HTTP to retrieve these values from the SMA Webbox (if you cannot open another port to the device to use Modbus, for example), here an export of my data source and data point configuration that you may import into Mango:
{ "dataSources":[ { "xid":"DS_261176", "name":"PLANT_NAME", "enabled":true, "type":"HTTP_RETRIEVER", "alarmLevels":{ "PARSE_EXCEPTION":"NONE", "DATA_RETRIEVAL_FAILURE":"NONE", "SET_POINT_FAILURE":"NONE" }, "purgeType":"YEARS", "updatePeriodType":"MINUTES", "quantize":false, "retries":2, "setPointUrl":"", "timeoutSeconds":5, "updatePeriods":5, "url":"http:\/\/IP_OF_WEBBOX\/home.htm", "purgeOverride":true, "purgePeriod":1 } ], "dataPoints":[ { "xid":"DP_973514", "name":"Power", "enabled":true, "loggingType":"ON_CHANGE", "intervalLoggingPeriodType":"MINUTES", "intervalLoggingType":"INSTANT", "purgeType":"YEARS", "pointLocator":{ "dataType":"NUMERIC", "ignoreIfMissing":false, "setPointName":"", "settable":false, "timeFormat":"", "timeRegex":"", "valueFormat":"", "valueRegex":"<td id=\\\"Power\\\" class=\\\"tdcol1\\\">(.*) W<\/td>" }, "eventDetectors":[ ], "plotType":"STEP", "unit":"W", "chartColour":"", "chartRenderer":null, "dataSourceXid":"DS_261176", "defaultCacheSize":1, "deviceName":"DEVICE_NAME", "discardExtremeValues":false, "discardHighLimit":0.0, "discardLowLimit":0.0, "intervalLoggingPeriod":15, "intervalLoggingSampleWindowSize":0, "overrideIntervalLoggingSamples":false, "purgeOverride":true, "purgePeriod":1, "textRenderer":{ "type":"ANALOG", "useUnitAsSuffix":true, "unit":"W", "renderedUnit":"W", "format":"####.##" }, "tolerance":0.0 }, { "xid":"DP_348506", "name":"Daily Yield", "enabled":true, "loggingType":"ON_CHANGE", "intervalLoggingPeriodType":"MINUTES", "intervalLoggingType":"INSTANT", "purgeType":"YEARS", "pointLocator":{ "dataType":"NUMERIC", "ignoreIfMissing":false, "setPointName":"", "settable":false, "timeFormat":"", "timeRegex":"", "valueFormat":"", "valueRegex":"<td id=\\\"DailyYield\\\" class=\\\"tdcol1\\\">(.*) kWh<\/td>" }, "eventDetectors":[ ], "plotType":"STEP", "unit":"kW\/h", "chartColour":"", "chartRenderer":null, "dataSourceXid":"DS_261176", "defaultCacheSize":1, "deviceName":"DEVICE_NAME", "discardExtremeValues":false, "discardHighLimit":0.0, "discardLowLimit":0.0, "intervalLoggingPeriod":15, "intervalLoggingSampleWindowSize":0, "overrideIntervalLoggingSamples":false, "purgeOverride":true, "purgePeriod":1, "textRenderer":{ "type":"ANALOG", "useUnitAsSuffix":true, "unit":"kW\/h", "renderedUnit":"kW\/h", "format":"####.##" }, "tolerance":0.0 }, { "xid":"DP_288031", "name":"Total Yield", "enabled":true, "loggingType":"ON_CHANGE", "intervalLoggingPeriodType":"MINUTES", "intervalLoggingType":"INSTANT", "purgeType":"YEARS", "pointLocator":{ "dataType":"NUMERIC", "ignoreIfMissing":false, "setPointName":"", "settable":false, "timeFormat":"", "timeRegex":"", "valueFormat":"", "valueRegex":"<td id=\\\"TotalYield\\\" class=\\\"tdcol1\\\">(.*) MWh<\/td>" }, "eventDetectors":[ ], "plotType":"STEP", "unit":"MW\/h", "chartColour":"", "chartRenderer":null, "dataSourceXid":"DS_261176", "defaultCacheSize":1, "deviceName":"DEVICE_NAME", "discardExtremeValues":false, "discardHighLimit":0.0, "discardLowLimit":0.0, "intervalLoggingPeriod":15, "intervalLoggingSampleWindowSize":0, "overrideIntervalLoggingSamples":false, "purgeOverride":true, "purgePeriod":1, "textRenderer":{ "type":"ANALOG", "useUnitAsSuffix":true, "unit":"MW\/h", "renderedUnit":"MW\/h", "format":"####.###" }, "tolerance":0.0 } ] }
BTW, an alternative to using Modbus on the SMA Webbox is to use RPC (which requires no set up and can run on port 80). SMA have published the specification for it: http://files.sma.de/dl/4253/SWebBoxRPC-eng-BUS112713.pdf
When queried at a particular URL, the Webbox returns a JSON string of the entire plant parameters. All it needs is for someone to write a Mango module for it!
Attachment: download link