Link to Bloomberg scraping with ElectroNeek and AutoHotkey. JS injected into the address bar selects the table and copies it to the clipboard. ElectroNeek writes the clipboard contents to a .tsv file, which is good for Excel.
The clipboard contents still need a Javascript function to fix the formatting. Alignment of data rows with headers is off. It might be more useful to select and copy just the table body, write the headers to Excel, save the file as .tsv, then append the body to the saved file.
Javascript injection to scrape Bloomberg
EDIT: The scrape is now a little better as far as formatting goes.
The ElectroNeek flow is just running the AHK code below from the Command Prompt activity, with a delay to 5 seconds to keep the browser open for debug purposes.
AutoHotkey code, no commenting for space saving purposes (adjust the file save location on line 52 for your system, or it’ll end up in the program directory). Thanks to @teadrinker on the AutoHotkey forum for the essential parts of this code. Selenium is not required; stock Chrome is running without using the debug port.
SetBatchLines, -1
tableJS =
(
var items = document.querySelectorAll('.data-table-row' );
var docStr = "";
var tmp = "";
var res = "";
var res1 = "";
items.forEach(function(element){
tmp = element.innerText;
res1 = tmp.replace(/(\n)/m, " ");
res1 = res1.replace(/(\n)/m, " ");
res = res1.replace(/(\r\n|\n|\r)/gm, "\t");
docStr += res + "\n";
});
(() => {
if (window.location.protocol === 'https:') {
document.documentElement.focus();
const timer = setInterval(() => {
if (document.hasFocus()) {
clearInterval(timer);
navigator.clipboard.writeText(docStr);
}
}, 1000);
}
else {
const textArea = document.createElement('textarea');
textArea.value = document.documentElement.outerHTML;
textArea.wrap = 'off';
textArea.rows = 100000;
textArea.style.position = 'fixed';
document.documentElement.appendChild(textArea);
textArea.focus();
textArea.select();
document.execCommand('copy');
textArea.parentNode.removeChild(textArea);
}
})();
)
Run, Chrome.exe https://www.bloomberg.com/markets/stocks/world-indexes/americas
sleep, 5000
RunJsFromChromeAddressBar(tableJS)
sleep 5000
;msgbox %Clipboard%
body := Clipboard
table := "Name`tValue`tNet Change`t% Change`t1 Month`t1Year`tTime (EDT)`r`n" . Clipboard
sleep, 1000
FileAppend, %table%, ahk2.tsv
sleep, 100
WinKill, ahk_exe Chrome.exe
ExitApp
RunJsFromChromeAddressBar(js, exe := "chrome.exe") {
static WM_GETOBJECT := 0x3D
, ROLE_SYSTEM_TEXT := 0x2A
, STATE_SYSTEM_FOCUSABLE := 0x100000
, SELFLAG_TAKEFOCUS := 0x1
if !AccAddrBar {
window := "ahk_class Chrome_WidgetWin_1 ahk_exe " . exe
SendMessage, WM_GETOBJECT, 0, 1, Chrome_RenderWidgetHostHWND1, % window
AccChrome := AccObjectFromWindow( WinExist(window) )
AccAddrBar := SearchElement(AccChrome, {Role: ROLE_SYSTEM_TEXT, State: STATE_SYSTEM_FOCUSABLE})
}
AccAddrBar.accValue(0) := "javascript:" . js
AccAddrBar.accSelect(SELFLAG_TAKEFOCUS, 0)
ControlSend,, {Enter}, % window, Chrome Legacy Window
}
SearchElement(parentElement, params)
{
found := true
for k, v in params {
try {
if (k = "ChildCount")
(parentElement.accChildCount != v && found := false)
else if (k = "State")
(!(parentElement.accState(0) & v) && found := false)
else
(parentElement["acc" . k](0) != v && found := false)
}
catch
found := false
} until !found
if found
Return parentElement
for k, v in AccChildren(parentElement)
if obj := SearchElement(v, params)
Return obj
}
AccObjectFromWindow(hWnd, idObject = 0) {
static IID_IDispatch := "{00020400-0000-0000-C000-000000000046}"
, IID_IAccessible := "{618736E0-3C3D-11CF-810C-00AA00389B71}"
, OBJID_NATIVEOM := 0xFFFFFFF0, VT_DISPATCH := 9, F_OWNVALUE := 1
, h := DllCall("LoadLibrary", "Str", "oleacc", "Ptr")
VarSetCapacity(IID, 16), idObject &= 0xFFFFFFFF
DllCall("ole32\CLSIDFromString", "Str", idObject = OBJID_NATIVEOM ? IID_IDispatch : IID_IAccessible, "Ptr", &IID)
if DllCall("oleacc\AccessibleObjectFromWindow", "Ptr", hWnd, "UInt", idObject, "Ptr", &IID, "PtrP", pAcc) = 0
Return ComObject(VT_DISPATCH, pAcc, F_OWNVALUE)
}
AccChildren(Acc) {
static VT_DISPATCH := 9
Loop 1 {
if ComObjType(Acc, "Name") != "IAccessible" {
error := "Invalid IAccessible Object"
break
}
try cChildren := Acc.accChildCount
catch
Return ""
Children := []
VarSetCapacity(varChildren, cChildren*(8 + A_PtrSize*2), 0)
res := DllCall("oleacc\AccessibleChildren", "Ptr", ComObjValue(Acc), "Int", 0
, "Int", cChildren, "Ptr", &varChildren, "IntP", cChildren)
if (res != 0) {
error := "AccessibleChildren DllCall Failed"
break
}
Loop % cChildren {
i := (A_Index - 1)*(A_PtrSize*2 + 8)
child := NumGet(varChildren, i + 8)
Children.Push( (b := NumGet(varChildren, i) = VT_DISPATCH) ? AccQuery(child) : child )
( b && ObjRelease(child) )
}
}
if error
ErrorLevel := error
else
Return Children.MaxIndex() ? Children : ""
}
AccQuery(Acc) {
static IAccessible := "{618736e0-3c3d-11cf-810c-00aa00389b71}", VT_DISPATCH := 9, F_OWNVALUE := 1
try Return ComObject(VT_DISPATCH, ComObjQuery(Acc, IAccessible), F_OWNVALUE)
}
Regards,
burque505