Thursday, June 4, 2020

Reading Excel Data from a Spreadsheet with Python

import openpyxl
import os

'''
 Adapted from https://automatetheboringstuff.com/2e/chapter13/
 Data files used can be found at this link
'''

#os.chdir('E:\\Google Drive\\python_work\\MyPythonScripts\\datafiles')
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')

print('''
Workbooks, Sheets, Cells
========================
As a quick review, here’s a rundown of all the functions, methods, and data
types involved in reading a cell out of a spreadsheet file:

1. Import the openpyxl module.
2. Call the openpyxl.load_workbook() function.
3. Get a Workbook object.
4. Use the active or sheetnames attributes.
5. Get a Worksheet object.
6. Use indexing or the cell() sheet method with row and column keyword arguments.
7. Get a Cell object.
8. Read the Cell object’s value attribute.

''')

# Create a workbook object by opening the xlsx file
workbook = openpyxl.load_workbook('example.xlsx')

print(workbook)
#<openpyxl.workbook.workbook.Workbook object at 0x000001F0E4786250>

# Is it a workbook object?
print(type(workbook))

# What sheets are in the workbook
print(workbook.sheetnames)

# Deprecated
#sheet = workbook.get_sheet_by_name('Sheet1')
# New
sheet = workbook['Sheet1']

print(type(sheet))
#<class 'openpyxl.worksheet.worksheet.Worksheet'>

# Get the sheet's title as a string.
print(sheet.title)

# Get the active sheet.
print(workbook.active)

# A cell
print(sheet['A1'])
#<Cell 'Sheet1'.A1>

# Whats the value in A1
print(sheet['A1'].value)

# Get another cell from the sheet.
c = sheet['B1']

# Whats the value in B1
print(c.value)

 # Get the row, column, and value from the cell.
print('Row %s, Column %s is %s' % (c.row, c.column, c.value))

print('Cell %s is %s' % (c.coordinate, c.value))

# Using cell()
print(sheet.cell(row=1, column=2))

# Whats the cell value at intersection of row1 coumn2
print(sheet.cell(row=1, column=2).value)
print(sheet['B1'])

# Go through every other row:
for i in range(1, 8, 2):
    print(i, sheet.cell(row=i, column=2).value)

# Get the highest row number.
print(sheet.max_row)

# Get the highest column number.
print(sheet.max_column)

# Converting between column letters and numbers
from openpyxl.utils import get_column_letter, column_index_from_string

# Translate column 1 to a letter.
print(get_column_letter(1))
print(get_column_letter(2))
print(get_column_letter(27))
print(get_column_letter(900))
print(get_column_letter(sheet.max_column))
# Get A's number.
print(column_index_from_string('A'))
# Get AA's number.
print(column_index_from_string('AA'))
print()

# Getting Rows and Columns from the Sheets
# Get all cells from the Area A1 to C8.
print(tuple(sheet['A1':'C8']))
print()
# Print the Area's cell values using two for loops:
# The outer for goes over each row in the slice;
#   The nested for loop goes through each cell in that row
for rowOfCellObjects in sheet['A1':'C8']:
    for cellObj in rowOfCellObjects:
        print(cellObj.coordinate, cellObj.value, end=' ')
    print('\n--- END OF ROW ---\n')

# Access all cell values in a particular row or column
# A Worksheet object’s rows and columns attribute must be converted to lists with the list() function
# Print second column's cells.
print(list(sheet.columns)[1])
# List all cell values in column B
print('\nList all cell values in column B')
for cellObj in list(sheet.columns)[1]:
    print(cellObj.coordinate, cellObj.value)
#
# List all cell values in row 2
print('\nList all cell values in row 2')
for cellObj in list(sheet.rows)[1]:
    print(cellObj.coordinate, cellObj.value)
       

No comments:

Post a Comment

Please add comments so I may update the material to accommodate platform modification to various commands. Also if you have some real-world caveats, do please share.

Search Duke

About the Author

My photo
Central Florida, United States