readout dynamic web page content

hw-schrauber

New member
Hi Forum,

I like to readout var from my solar system website. There is no API. When I call the side by my lokal IP “www.xxx.yyy.zzz/status.html” I get a webpage with the following dynamic source code:

.
.
.
width:16px;
text-align:center;
}
</style>
<script type="text/javascript">
var height=0;function fileText(id,value){if(document.getElementById(id)){document.getElementById(id).innerHTML=value}}function changeFont(){reCon("main_div").style.fontFamily=window.parent.reFont()}function child_getH(){var nh=document.body.offsetHeight+100;if(nh<500||nh==null){nh=500}if(height!=nh){height=nh;window.parent.child_height(height)}}function reCon(id){return document.getElementById(id)}function ready(){try{window.parent.show_ifr()}catch(e){}child_getH()}function show(v){var c=document.getElementById(v);if(c!=null){c.style.display=""}}function hide(v){var c=document.getElementById(v);if(c!=null){c.style.display="none"}};
</script>
<script type="text/javascript">
var webdata_sn = "2212065874 ";
var webdata_msvn = " ";
var webdata_ssvn = " ";
var webdata_pv_type = "";
var webdata_rate_p = "";
var webdata_now_p = "27";
var webdata_today_e = "0.0";
var webdata_total_e = "1.2";
var webdata_alarm = "";
var webdata_utime = "0";
var cover_mid = "4197332755";
var cover_ver = "MW3_16U_5406_1.57";
var cover_wmode = "APSTA";
var cover_ap_ssid = "AP_2212065874";
var cover_ap_ip = "10.10.100.254";
var cover_ap_mac = "EC:FD:F8:90:4E:7E";
var cover_sta_ssid = "Airrunner";
var cover_sta_rssi = "57%";
var cover_sta_ip = "192.168.50.229";
var cover_sta_mac = "E8:FD:F8:90:4E:7E";
var status_a = "1";
var status_b = "0";
var status_c = "0";

function initPageText(){var list=window.parent.reList("status");fileText("st1",list["t1"]);fileText("st2",list["t2"]);fileText("st3",list["t3"]);for(var i=1;i<=27;i++){if(i!=14){fileText("tx"+i,list)}}changeFont();child_getH()}function upfold(v){if(document.getElementById("up_"+v+"_div").style.display=="none"){show("up_"+v+"_div");reCon("p_"+v).innerHTML="-"}else{hide("up_"+v+"_div");reCon("p_"+v).innerHTML="+"}}function init_main_page(){var on=window.parent.reTip("1");var off=window.parent.reTip("2");document.getElementById("cover_mid").innerHTML=cover_mid;document.getElementById("cover_ver").innerHTML=cover_ver;document.getElementById("cover_ap_status").innerHTML=off;document.getElementById("cover_sta_status").innerHTML=off;if(cover_wmode!="STA"){document.getElementById("cover_ap_status").innerHTML=on;document.getElementById("cover_ap_ssid").innerHTML=cover_ap_ssid;document.getElementById("cover_ap_ip").innerHTML=cover_ap_ip;document.getElementById("cover_ap_mac").innerHTML=cover_ap_mac}if(cover_wmode!="AP"){document.getElementById("cover_sta_status").innerHTML=on;document.getElementById("cover_sta_ssid").innerHTML=cover_sta_ssid;document.getElementById("cover_sta_rssi").innerHTML=cover_sta_rssi;document.getElementById("cover_sta_ip").innerHTML=cover_sta_ip;document.getElementById("cover_sta_mac").innerHTML=cover_sta_mac}if(webdata_sn==""){webdata_sn="---"}fileText("webdata_sn",webdata_sn);if(webdata_msvn==""){webdata_msvn="---"}fileText("webdata_msvn",webdata_msvn);if(webdata_ssvn==""){webdata_ssvn="---"}fileText("webdata_ssvn",webdata_ssvn);if(webdata_pv_type==""){webdata_pv_type="---"}fileText("webdata_pv_type",webdata_pv_type);if(webdata_rate_p==""){webdata_rate_p="---"}fileText("webdata_rate_p",webdata_rate_p+" W");if(webdata_now_p==""||webdata_now_p==0){webdata_now_p="---"}fileText("webdata_now_p",webdata_now_p+" W");if(webdata_today_e==""){webdata_today_e="---"}fileText("webdata_today_e",webdata_today_e+" kWh");if(webdata_total_e==""){webdata_total_e="---"}fileText("webdata_total_e",webdata_total_e+" kWh");if(webdata_alarm==""){webdata_alarm="---"}fileText("webdata_alarm",webdata_alarm);if(webdata_utime==""){if(document.getElementById("webdata_sn").innerHTML=="---"){webdata_utime="---"}else{webdata_utime=value+window.parent.reTip("5")}}fileText("webdata_utime",webdata_utime);var st_en=window.parent.reTip("3");var st_dis=window.parent.reTip("4");var st_un=window.parent.reTip("41");if(status_a=="1"){document.getElementById("cover_remote_status_a").innerHTML=st_en}else{if(status_a=="0"){document.getElementById("cover_remote_status_a").innerHTML=st_dis}else{document.getElementById("cover_remote_status_a").innerHTML=st_un}}if(status_b=="1"){document.getElementById("cover_remote_status_b").innerHTML=st_en}else{if(status_b=="0"){document.getElementById("cover_remote_status_b").innerHTML=st_dis}else{document.getElementById("cover_remote_status_b").innerHTML=st_un}}};

</script>
</head>
<body class="in_body" onload="init_main_page();">
.
.
.

What can I do to read out the value for “var webdata_now_p” in a macro droid variable?

Thanks in front,
hw-schrauber
 

Jacob L

Moderator (Lawsonator)
Http request > get > URL > save to variable at the bottom.

Next action is text manipulation action > extract > source text is the variable and text to extract would be var webdata_now_p.+ if that's the site content
 

Endercraft

Moderator (& bug finder :D)
Try this macro. As an extension to @Jacob L's answer (which I missed) it will only extract the number and nothing else.
Test the trigger to... test.
 

Attachments

  • Extract_text.macro
    6.7 KB · Views: 11

hw-schrauber

New member
Thanks Jacob,
Http request > get > URL > save to variable at the bottom.
Have I done. I can see the hole Page in the global var.
Next action is text manipulation action > extract > source text is the variable and text to extract would be var webdata_now_p.+ if that's the site content
This I have tried, but I think I have done something wrong. I test it with the used var (source var, start text, end text), but the new var is still empty. Then I have tested the beginning of the page source variable, start text: "DOCTYPE", end text: "PUBLIC". I had expected "html" in the new var. But the new var is still empty.

Do you have an idea?
 

Endercraft

Moderator (& bug finder :D)
@hw-schrauber check this post.

 

hw-schrauber

New member
Thanks Endercraft,

I have imported your macro (thanks for that). Also, I have been changing the var “res” to my var from that I know that the page content is in, and when I then press the “Test” button in the dialog, the output is empty. Furthermore, I have also tested to change the [] into {} but this also makes no difference.
 

Endercraft

Moderator (& bug finder :D)
I tested it with the text you provided and it worked.
You should check if the text is indeed here.
 

hw-schrauber

New member
I wrote, that the content of the page is in the var. I can see this when I press the "test button". I see content also when I show local Var under trigger, action, conditions of a macro.
To be sure that all data are in, I have also written the content to a file. The file with the complete content I have attached.
At line 123 you find the needed var + value.
 

Attachments

  • macrodroidvariable.txt
    11.6 KB · Views: 5
  • IMG_20230904_160129.jpg
    IMG_20230904_160129.jpg
    310.5 KB · Views: 13
  • 1693837220277.jpg
    1693837220277.jpg
    143.9 KB · Views: 11
Last edited:

sampleuserhere

Active member
I copied what @Endercraft had shared, and worked with the latest text. This one extract the variables and convert them all into JSON. Set the output variable after the clear action to the output of http request.

I went a bit overboard but you can now read the rest of them at ease.
 

Attachments

  • Extract_text_vsampleuserhere.macro
    23.2 KB · Views: 6

hw-schrauber

New member
@Endercraft ,@sampleuserhere

thanks for your help.

I think I am one step further. I have imported your (sampleuserhere) and run it as is. This is how it works. But if I delete the variable "res" and then execute the http request "GET" and read the content directly into the variable "res", then in the variable "output" is exactly the same as in "res" only enclosed with {}. The JSON variable remains empty. If I look at the content of the variable "res" after the GET, then it is identical to the content of the exported file.
It looks like it makes a difference if the data is already in the variable "res" or if the GET command fills it.
 

Attachments

  • Screenshot_2023-09-05-09-22-31-959_com.arlosoft.macrodroid.jpg
    Screenshot_2023-09-05-09-22-31-959_com.arlosoft.macrodroid.jpg
    594.9 KB · Views: 5
Last edited by a moderator:

sampleuserhere

Active member
It does happen lmao, I tested by reading the file directly and I got the same result as you did.It was the regexp.
Anyway, change the first Text Manipulation regex with the following regexp.

Code:
^[\s\S]+<script type="text/javascript">([\s\S]+?);\s+function[\s\S]+$
 
Last edited:

sampleuserhere

Active member
Thanks, @sampleuserhere

do you know why you have the same result with the first try?
Your last code work perfect! :)

It was actually the regex, I must be tripping when I said #12. My bad.

Code:
^[\s\S]+?<script type="text/javascript">\s*(var webdata_sn.+?var \w+ = "[^\"]*");\s+function[\s\S]+$

The problem is .+? after webdata_sn, It should be [\S\s]+ instead.
 

Quidn

Passionate Member
It was actually the regex, I must be tripping when I said #12. My bad.

Code:
^[\s\S]+?<script type="text/javascript">\s*(var webdata_sn.+?var \w+ = "[^\"]*");\s+function[\s\S]+$

The problem is .+? after webdata_sn, It should be [\S\s]+ instead.

How about this?

((?:var[^;]+;\s+)+)

I think this will be much readable to beginners(like me) and easy to reuse.
I didn't know even what +? and [\s\S] do. I used ? as "is optional" marking only. Thank you for the tip.
 

sampleuserhere

Active member
How about this?

((?:var[^;]+;\s+)+)

I think this will be much readable to beginners(like me) and easy to reuse.
I didn't know even what +? and [\s\S] do. I used ? as "is optional" marking only. Thank you for the tip.

That's cool, either way works.

[\S\s] matches anything since both are the opposite of each other. ? non-greedy, it matches less characters.
 

Quidn

Passionate Member
That's cool, either way works.

[\S\s] matches anything since both are the opposite of each other. ? non-greedy, it matches less characters.

[\s\S]: That's why I confused. Didn't get why they are grouped even I'm using \s at least every few days, until Googled.
I often write very complex RegEx but never thought about group like that. This is interesting.
Maybe that's because my pattern usually separated by \s like above. (+ And below)

By the way, I realized that string extracted by my RegEx should be processed before parsed as JSON.

  • ((?:(?<!\()(?:var|let|const)[^=";\v]+=\s*(?:\d+|true|false|null|"(?:[^"\v]|(?<=\\)")*")(?:;\s+|\s*\v\s*))+)
  • (((var|let|const)[^=";\v]+=\s*(\d+|true|false|null|"([^"\v]|(?<=\\)")*")(;\s+|\s*\v\s*))+)
  • ((var[^=";\v]+=\s*(\d+|"[^"]*")(;|\v)\s+)+)
  • ((?:var[^;]+;\s+)+)

Longer ones don't needed to be ensure double-quoted, but not so simple anymore. :(

Even though so, still not very long and might be used universally without any modification.
 

sampleuserhere

Active member
By the way, I realized that string extracted by my RegEx should be processed before parsed as JSON.

Yup, my end goal was to create JSON. I didn't think I could manage to exclude ; at the last line with match group, so I just go all the way. It costed only one less action, which doesn't really matter much.

At the very least OP got the job done, what I suggested will break if something other than a function is declared after the variables so he probably will come back here later, lmao.
 
Top