Wednesday, March 13, 2019

Oh I get it

How to make the billionaires page.

download xlsx file from hurun
http://www.hurun.net/EN/Article/Details?num=24DD41EE3B19
Scroll all the way to the bottom. Use the second file, English.

make copy
open in android sheets
delete most columns on the right.
   click column header, whole column highlights.
   small dot handle appears on sides of column.
   drag over columns to right. keep about 8 columns on far left.
   click in highlighted area. select "more"
   options appear, click delete.
select 6 columns, as above, starting with name, column C.
   copy by click in highlighted, select copy.

now make CSV.
paste into Quickedit editor.
remove top 3 lines, not names.
replace any commas with periods.
replace tabs with commas
   regex  (?m)\t to ,
select all, copy.

add line numbers for rank.
paste into this website
https://www.online-utility.org/text/grep.jsp
regex .*
check the box to add line numbers
click process text button.
select and copy all text.

paste into quickedit
replace colon space with commas
remove extra lines from web page at top and bottom.
the content is now CSV

save this as a working copy. if you mess up next step come back here.

once again, copy text to a new file in quick edit.
save this, but change the encoding to UTF-16BE, this is important for the next regex which will mess up some non-Latin characters in the default encoding.
regex search:
   (?m)^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),([^,]+)$
replace:
<a c="$6" href="http://www.google.com/search?q=$2 billionaire" n="$2" r="$1"><span class="big">$3</span> $2</a> $6 $4 $5

this turns the file into HTML links.

now you need the supporting html, css, and JavaScript.

<script>
var sortby = 'n';
var prevOrder = 'r';

function reverseList(){
var itemsArr = getPRElist();
itemsArr.reverse();
applyChanges(itemsArr);
}

function sortOf(w){
sortby=w;
var itemsArr = getPRElist();

itemsArr.sort(function(a, b) {
var aa;
var bb;
if (sortby == 'l'){
  aa = a.getAttribute('n').replace(/\s+$/, '').replace(' & Family', '');
  bb = b.getAttribute('n').replace(/\s+$/, '').replace(' & Family', '');
  lastIndexa = aa.lastIndexOf(' ');
  lastIndexb = bb.lastIndexOf(' ');
 if (lastIndexa < 0) lastIndexa =0;
 if (lastIndexb < 0) lastIndexb =0;

aa = aa.substr(lastIndexa);
  bb = bb.substr(lastIndexb);

}else{
  aa = a.getAttribute(sortby);
  bb = b.getAttribute(sortby);
}



if (sortby == 'c'){  

   if (aa == bb  && prevOrder == 'r'){
      var aprev = parseInt (a.getAttribute(prevOrder), 10);
      var bprev = parseInt (b.getAttribute(prevOrder), 10);
      if (aprev < bprev){
         aa += 'a';
         bb += 'b';
      } else {
         aa += 'b';
         bb += 'a';
      }
   } else {
     aa += a.getAttribute(prevOrder);
     bb += b.getAttribute(prevOrder);
   }
}  

if (sortby == 'r'){
  return aa - bb;
}  

return aa == bb
? 0
: (aa > bb ? 1 : -1);
});

if(sortby=='r' || sortby=='n'){
  prevOrder = sortby;
}

applyChanges(itemsArr);
}

function addSpace(){
var bugger ='';
var list = document.getElementsByTagName('pre')[0].getElementsByTagName('small');
var items = list[0].childNodes;
alert ( items.length);

for (i = 0; i < list[0].childNodes.length; ++i) {

//bugger += i + 'name ' + items[i].nodeName + ' text: ' + items[i].text  + items[i].wholeText  + '\n';

if (list[0].childNodes[i].nodeName.toLowerCase() === "#text")
{
list[0].childNodes[i].nodeValue +="\n";
}
}
// alert ( 'addSpace\n' + bugger);
}

function shuffle(array) {
var currentIndex = array.length, temporaryValue, randomIndex;

// While there remain elements to shuffle...
while (0 !== currentIndex) {

// Pick a remaining element...
randomIndex = Math.floor(Math.random() * currentIndex);
currentIndex -= 1;

// And swap it with the current element.
temporaryValue = array[currentIndex];
array[currentIndex] = array[randomIndex];
array[randomIndex] = temporaryValue;
}

return array;
}

function getPRElist(){
var bugger ='';
var list = document.getElementsByTagName('pre')[0].getElementsByTagName('small');
var items = list[0].childNodes;

var itemsArr = [];
for (i = 0; i < list[0].childNodes.length; ++i) {

//bugger += i + 'name ' + items[i].nodeName + ' text: ' + items[i].text  + items[i].wholeText  + '\n';

if (items[i].nodeName.toLowerCase() === 'a') {
if (items[i+1].nodeName.toLowerCase() === "#text")
{
items[i].appendChild(items[i+1]); 
}
itemsArr.push(items[i]);
}
}
// alert ( 'getPREList\n' + bugger);
return itemsArr;
}

function shuffleList(){

var itemsArr = shuffle(getPRElist());
applyChanges(itemsArr);


}

function applyChanges(itemsArr){
var bugger ='';
var list = document.getElementsByTagName('pre')[0].getElementsByTagName('small');
// alert ('you are here');
list[0].length=0;
for (i = 0; i < itemsArr.length; ++i) {

//bugger += i + ' name ' + 
//itemsArr[i].nodeName + ' text: ' + //itemsArr[i].text  + 
//itemsArr[i].wholeText  + 
//'\n';

var txt = itemsArr[i].removeChild(
itemsArr[i].lastChild);
list[0].appendChild(itemsArr[i]);
list[0].appendChild(txt);
}
// alert ( 'applyChanges\n' + bugger);
}

</script>

<style>
a:visited {
color : red;
font-weight : lighter;
}
#btn {
margin : 10px;
border-radius : 20%;
opacity : 1.0;

background-color : white; 
}
span.big {
font-size: 150%;
}
a {
display: inline-block; 
min-width:175px;
}

</style>
<br />
<small>
</small>
<br />
<div style="float: right; margin: -27% 27% 0 0;">
<div style="position: fixed;">
<div>
<small><button id="btn" onclick="reverseList()" type="button">reverse</button>
</small></div>
<div>
<small><button id="btn" onclick="sortOf('n')" type="button">Name</button>
</small></div>
<div>
<small><button id="btn" onclick="sortOf('r')" type="button">Rank</button>

</small></div>
<div>
<small><button id="btn" onclick="sortOf('c')" type="button">Country</button>
</small></div>
<div>
<small><button id="btn" onclick="addSpace()" type="button">add space</button>
</small></div>
<div>
<small><button id="btn" onclick="sortOf('l')" type="button">last name</button>
</small></div>
<div>
<small><button id="btn" onclick="shuffleList()" type="button">shuffle</button>
</small></div>
</div>
</div>
<br />
<pre style="white-space: pre-wrap;"><small>
 (PUT HTML LINKS HERE) </small>
</pre>

Ok, now put it all together.
I used blogger as a web page builder.
Copy and paste the code stuff above into a blogger post.
Then copy and paste the HTML links between the
<small>
 (PUT HTML LINKS HERE)</small>

tags near the bottom.
Click Publish button.

If you get a publish error that says there was a problem, but no details it probably worked. Click Close, it will complain, be brave, close anyway. 
You May need to refresh the posts page in blogger to see your new post. 
Click view. 

If you get a Publish error that shows an HTML error, either you or I made a mistake. See if you can fix it. 

No comments:

Post a Comment

more billionaires

    billionaires listing