Python Script: Scrape data from ESXi hosts

I’m currently integrating about a dozen ESXi hosts into a vCenter server. Before vCenter, these hosts were completely standalone and managed individually. Even the simple task of finding which host a VM was running on was difficult and time consuming.

So I wrote a script, primarily to list all VMs running on our hosts, but also to pull down some relevant and useful information on each VM/host. The output is to an XLSX file, the below screenshots are a sample of the information gathered:

The code is below. It’s messy, a bit hacky, and has some unnecessary duplication in there, but it does the job. I have this running as a daily tasks, and continues to be useful until we have all our hosts under vCenter. It has some limitations of course to be aware of:

  • It presumes you are using the same username/password for all your ESXi hosts
  • You need to manually maintain the ESXi host list in the script. If the script attempts to connect to an IP/hostname that doesn’t exist, the script will simply fail.
  • The script will not run successfully if the destination output file is already open.

# Script to read list of VMs and stats from each ESXi host and generate an Excel sheet.
# Hosts need to be updated manually.
# Defaultroot - 16/07/2019

import xlsxwriter
from pyvim import connect
import win32com.client as win32
from time import sleep
from math import pow, ceil

# Specify ESXi host(s) username and password
host_username = 'root'
host_password = 'password'

# Set filepath of XLSX file
filepath = r'C:\VMs.xlsx'

# List of ESXi hosts, can be DNS or IP
hosts = ['172.26.21.52',
         '172.26.21.53',
         '172.26.21.73',
         '172.26.21.103',
         '172.26.21.80',
         '172.26.21.81',
         '172.26.21.82',
         ]

# Create spreadsheet and set bold format for header
workbook = xlsxwriter.Workbook(filepath)
worksheet_vms = workbook.add_worksheet(name='VMs')
worksheet_hosts = workbook.add_worksheet(name='Hosts')
bold = workbook.add_format({'bold': True})

# Headers for first row
vm_header = ('Name',
          'ESX Host',
          'Power',
          'Hostname',
          'IP Address',
          'Operating System',
          'Notes',
          'Last Boot',
          'VMware Tools'
             )

# Headers for first row
hw_header = ('Host',
             'Version',
             'Build',
             'API Version',
             'License Version',
             'Vendor',
             'Model',
             'UUID',
             'CPU Model',
             'CPU Mhz',
             'CPU Count',
             'Core Count',
             'Thread Count',
             'Memory GB',
             'NICs',
             'Serial Number'
             )

vm_row = 0
hw_row = 0
vm_col = 0
hw_col = 0

# Write headers
for item in vm_header:
    worksheet_vms.write(vm_row, vm_col, item, bold)
    vm_col += 1

for item in hw_header:
    worksheet_hosts.write(hw_row, hw_col, item, bold)
    hw_col += 1


# Set row to start after headers
vm_row = 1
hw_row = 1

for host in hosts:

    # Open connection to ESXi host
    try:
        si = connect.SmartConnectNoSSL(host=host, user=host_username, pwd=host_password)
        print("Connected to {}".format(host))
    except:
        print("Failed to connect to {}".format(host))

    inv = si.RetrieveContent()
    dc1 = inv.rootFolder.childEntity[0]
    vmList = dc1.vmFolder.childEntity


    for vm in vmList:

        # More details can be accessed from vm.summary, but these are the most relevant
        powerState = vm.summary.runtime.powerState
        bootTime = str(vm.summary.runtime.bootTime)
        #maxCpuUsage = vm.summary.runtime.maxCpuUsage
        #maxMemoryUsage = vm.summary.runtime.maxMemoryUsage
        #paused = vm.summary.runtime.paused
        #snapshotInBackground = vm.summary.runtime.snapshotInBackground
        toolsStatus = vm.summary.guest.toolsStatus
        hostName = vm.summary.guest.hostName
        ipAddress = vm.summary.guest.ipAddress
        name = vm.summary.config.name
        #vmPathName = vm.summary.config.vmPathName
        #memorySizeMB = vm.summary.config.memorySizeMB
        #numEthernetCards = vm.summary.config.numEthernetCards
        #numVirtualDisks = vm.summary.config.numVirtualDisks
        #guestId = vm.summary.config.guestId
        guestFullName = vm.summary.config.guestFullName
        annotation = vm.summary.config.annotation

        # Add data to list so it can be written to file
        data = (name,
                host,
                powerState,
                hostName,
                ipAddress,
                guestFullName,
                annotation,
                bootTime,
                toolsStatus
        )

        vm_col = 0

        # Write data to row
        for x in data:
            worksheet_vms.write(vm_row, vm_col, x)
            vm_col += 1

        vm_row += 1

    vm_col = 0


    content = si.RetrieveContent()
    hw = si.content.rootFolder.childEntity[0].hostFolder.childEntity[0].host[0]

    # More details can be accessed from vm.summary, but these are the most relevant
    fullname = content.about.fullName
    version = content.about.version
    build = content.about.build
    apiVersion = content.about.apiVersion
    license = content.about.licenseProductVersion
    vendor = hw.summary.hardware.vendor
    model = hw.summary.hardware.model
    uuid = hw.summary.hardware.uuid
    cpuModel = hw.summary.hardware.cpuModel
    cpuMhz = hw.summary.hardware.cpuMhz
    numCpuPkgs = hw.summary.hardware.numCpuPkgs
    numCpuCores = hw.summary.hardware.numCpuCores
    numCpuThreads = hw.summary.hardware.numCpuThreads
    memorySize = ceil(hw.summary.hardware.memorySize / pow(1024, 3))
    numNics = hw.summary.hardware.numNics
    try:
        sn = hw.summary.hardware.otherIdentifyingInfo[1].identifierValue
    except:
        sn = "Unknown"

    # Add data to list so it can be written to file
    data = (host,
            version,
            build,
            apiVersion,
            license,
            vendor,
            model,
            uuid,
            cpuModel,
            cpuMhz,
            numCpuPkgs,
            numCpuCores,
            numCpuThreads,
            memorySize,
            numNics,
            sn
            )

    hw_col = 0

    # Write data to row
    for x in data:
        worksheet_hosts.write(hw_row, hw_col, x)
        hw_col += 1

    hw_row += 1

    # Disconnect from ESXi host
    connect.Disconnect(si)

workbook.close()

# Wait for file to be written
sleep(2)

# Some formatting on the created file so that cells are autofit size
excel = win32.gencache.EnsureDispatch('Excel.Application')
wb = excel.Workbooks.Open(filepath)
ws_vm = wb.Worksheets("VMs")
ws_hw = wb.Worksheets("Hosts")
ws_vm.Columns.AutoFit()
ws_hw.Columns.AutoFit()
wb.Save()
excel.Application.Quit()

print("Complete")

Leave a Reply

Your email address will not be published. Required fields are marked *