import openpyxl, os
from openpyxl.styles import Font
from openpyxl.styles.colors import Color
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
print(os.getcwd())
'''Adapted from https://automatetheboringstuff.com/2e/chapter13/
Data files used can be found at this link
Charts
======
OpenPyXL supports creating bar, line, scatter, and pie charts using the data in
a sheet’s cells.
1. Create a Reference object from a rectangular selection of cells.
2. Create a Series object by passing in the Reference object.
3. Create a Chart object.
4. Append the Series object to the Chart object.
5. Add the Chart object to the Worksheet object, optionally specifying which cell
should be the top-left corner of the chart.
You create Reference objects by calling the openpyxl.chart.Reference() function
and passing three arguments:
1. The Worksheet object containing your chart data.
2. A tuple of two integers, denoting the top-left row/column cell of the selection:
Note: 1 is the first row, not 0.
3. A tuple of two integers, denoting the bottom-right row/column cell of the
From top-left to bottom-right: (3, 2), (6, 4)
___|_A_|_B_|_C_|_D_|_E_
_1_|_10|___|___|___|___
_2_|_20|___|___|___|___
_3_|_30|_x_|_x_|_x_|___
_4_|_40|_x_|_x_|_x_|___
_5_|_50|_x_|_x_|_x_|___
_6_|_60|_x_|_x_|_x_|___
_7_|_70|___|___|___|___
_8_|_80|___|___|___|___
_9_|_90|___|___|___|___
10_|100|___|___|___|___
11_|___|___|___|___|___
'''
# Open blank wb, create 3 tabs, delete default tab "Sheet", set sheet 'S3' as active
# Create myFont and list worksheets in workbook
wb = openpyxl.Workbook()
wb.create_sheet(index=0, title='S1')
wb.create_sheet(index=1, title='S2')
wb.create_sheet(index=2, title='S3')
del wb['Sheet']
wb.active = wb['S3']
sheet = wb.active
myFont = Font(name='Arial', color='FF0000', size=14, bold=True)
print(wb.sheetnames)
# Create some data in cells A1-A10
for i in range(1, 11):
sheet['A' + str(i)] = (i * 10)
# 1. Create a reference object for the data in A1-A10
refObj = openpyxl.chart.Reference(sheet, min_col=1, min_row=1, max_col=1, max_row=10)
# 2. Create a series object from the reference object
seriesObj = openpyxl.chart.Series(refObj, title='First series')
# 3. Create a chart object (bar chart)
#Types of charts include:
#------------------------------
chartObj = openpyxl.chart.BarChart()
#chartObj = openpyxl.chart.LineChart()
#chartObj = openpyxl.chart.ScatterChart()
#chartObj = openpyxl.chart.PieChart()
chartObj.title = 'My Chart'
# 4. Append the series object to the chart object
chartObj.append(seriesObj)
# 5. The charts top left corner is B3
sheet.add_chart(chartObj, 'B3')
wb.save('sampleChart.xlsx')
# To reopen this file:
#wb = openpyxl.load_workbook('sampleChart.xlsx')
wb.close()
Thursday, June 4, 2020
Reading Excel Data from a Spreadsheet with Python
import openpyxl
import os
'''
Adapted from https://automatetheboringstuff.com/2e/chapter13/
Data files used can be found at this link
'''
#os.chdir('E:\\Google Drive\\python_work\\MyPythonScripts\\datafiles')
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
print('''
Workbooks, Sheets, Cells
========================
As a quick review, here’s a rundown of all the functions, methods, and data
types involved in reading a cell out of a spreadsheet file:
1. Import the openpyxl module.
2. Call the openpyxl.load_workbook() function.
3. Get a Workbook object.
4. Use the active or sheetnames attributes.
5. Get a Worksheet object.
6. Use indexing or the cell() sheet method with row and column keyword arguments.
7. Get a Cell object.
8. Read the Cell object’s value attribute.
''')
# Create a workbook object by opening the xlsx file
workbook = openpyxl.load_workbook('example.xlsx')
print(workbook)
#<openpyxl.workbook.workbook.Workbook object at 0x000001F0E4786250>
# Is it a workbook object?
print(type(workbook))
# What sheets are in the workbook
print(workbook.sheetnames)
# Deprecated
#sheet = workbook.get_sheet_by_name('Sheet1')
# New
sheet = workbook['Sheet1']
print(type(sheet))
#<class 'openpyxl.worksheet.worksheet.Worksheet'>
# Get the sheet's title as a string.
print(sheet.title)
# Get the active sheet.
print(workbook.active)
# A cell
print(sheet['A1'])
#<Cell 'Sheet1'.A1>
# Whats the value in A1
print(sheet['A1'].value)
# Get another cell from the sheet.
c = sheet['B1']
# Whats the value in B1
print(c.value)
# Get the row, column, and value from the cell.
print('Row %s, Column %s is %s' % (c.row, c.column, c.value))
print('Cell %s is %s' % (c.coordinate, c.value))
# Using cell()
print(sheet.cell(row=1, column=2))
# Whats the cell value at intersection of row1 coumn2
print(sheet.cell(row=1, column=2).value)
print(sheet['B1'])
# Go through every other row:
for i in range(1, 8, 2):
print(i, sheet.cell(row=i, column=2).value)
# Get the highest row number.
print(sheet.max_row)
# Get the highest column number.
print(sheet.max_column)
# Converting between column letters and numbers
from openpyxl.utils import get_column_letter, column_index_from_string
# Translate column 1 to a letter.
print(get_column_letter(1))
print(get_column_letter(2))
print(get_column_letter(27))
print(get_column_letter(900))
print(get_column_letter(sheet.max_column))
# Get A's number.
print(column_index_from_string('A'))
# Get AA's number.
print(column_index_from_string('AA'))
print()
# Getting Rows and Columns from the Sheets
# Get all cells from the Area A1 to C8.
print(tuple(sheet['A1':'C8']))
print()
# Print the Area's cell values using two for loops:
# The outer for goes over each row in the slice;
# The nested for loop goes through each cell in that row
for rowOfCellObjects in sheet['A1':'C8']:
for cellObj in rowOfCellObjects:
print(cellObj.coordinate, cellObj.value, end=' ')
print('\n--- END OF ROW ---\n')
# Access all cell values in a particular row or column
# A Worksheet object’s rows and columns attribute must be converted to lists with the list() function
# Print second column's cells.
print(list(sheet.columns)[1])
# List all cell values in column B
print('\nList all cell values in column B')
for cellObj in list(sheet.columns)[1]:
print(cellObj.coordinate, cellObj.value)
#
# List all cell values in row 2
print('\nList all cell values in row 2')
for cellObj in list(sheet.rows)[1]:
print(cellObj.coordinate, cellObj.value)
import os
'''
Adapted from https://automatetheboringstuff.com/2e/chapter13/
Data files used can be found at this link
'''
#os.chdir('E:\\Google Drive\\python_work\\MyPythonScripts\\datafiles')
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
print('''
Workbooks, Sheets, Cells
========================
As a quick review, here’s a rundown of all the functions, methods, and data
types involved in reading a cell out of a spreadsheet file:
1. Import the openpyxl module.
2. Call the openpyxl.load_workbook() function.
3. Get a Workbook object.
4. Use the active or sheetnames attributes.
5. Get a Worksheet object.
6. Use indexing or the cell() sheet method with row and column keyword arguments.
7. Get a Cell object.
8. Read the Cell object’s value attribute.
''')
# Create a workbook object by opening the xlsx file
workbook = openpyxl.load_workbook('example.xlsx')
print(workbook)
#<openpyxl.workbook.workbook.Workbook object at 0x000001F0E4786250>
# Is it a workbook object?
print(type(workbook))
# What sheets are in the workbook
print(workbook.sheetnames)
# Deprecated
#sheet = workbook.get_sheet_by_name('Sheet1')
# New
sheet = workbook['Sheet1']
print(type(sheet))
#<class 'openpyxl.worksheet.worksheet.Worksheet'>
# Get the sheet's title as a string.
print(sheet.title)
# Get the active sheet.
print(workbook.active)
# A cell
print(sheet['A1'])
#<Cell 'Sheet1'.A1>
# Whats the value in A1
print(sheet['A1'].value)
# Get another cell from the sheet.
c = sheet['B1']
# Whats the value in B1
print(c.value)
# Get the row, column, and value from the cell.
print('Row %s, Column %s is %s' % (c.row, c.column, c.value))
print('Cell %s is %s' % (c.coordinate, c.value))
# Using cell()
print(sheet.cell(row=1, column=2))
# Whats the cell value at intersection of row1 coumn2
print(sheet.cell(row=1, column=2).value)
print(sheet['B1'])
# Go through every other row:
for i in range(1, 8, 2):
print(i, sheet.cell(row=i, column=2).value)
# Get the highest row number.
print(sheet.max_row)
# Get the highest column number.
print(sheet.max_column)
# Converting between column letters and numbers
from openpyxl.utils import get_column_letter, column_index_from_string
# Translate column 1 to a letter.
print(get_column_letter(1))
print(get_column_letter(2))
print(get_column_letter(27))
print(get_column_letter(900))
print(get_column_letter(sheet.max_column))
# Get A's number.
print(column_index_from_string('A'))
# Get AA's number.
print(column_index_from_string('AA'))
print()
# Getting Rows and Columns from the Sheets
# Get all cells from the Area A1 to C8.
print(tuple(sheet['A1':'C8']))
print()
# Print the Area's cell values using two for loops:
# The outer for goes over each row in the slice;
# The nested for loop goes through each cell in that row
for rowOfCellObjects in sheet['A1':'C8']:
for cellObj in rowOfCellObjects:
print(cellObj.coordinate, cellObj.value, end=' ')
print('\n--- END OF ROW ---\n')
# Access all cell values in a particular row or column
# A Worksheet object’s rows and columns attribute must be converted to lists with the list() function
# Print second column's cells.
print(list(sheet.columns)[1])
# List all cell values in column B
print('\nList all cell values in column B')
for cellObj in list(sheet.columns)[1]:
print(cellObj.coordinate, cellObj.value)
#
# List all cell values in row 2
print('\nList all cell values in row 2')
for cellObj in list(sheet.rows)[1]:
print(cellObj.coordinate, cellObj.value)
Excel Spreadsheet font manipulation with Python
import openpyxl, os
from openpyxl.styles import Font
from openpyxl.styles.colors import Color
'''
Adapted from https://automatetheboringstuff.com/2e/chapter13/
Data files used can be found at this link
Customizing the Font Style of Cells
Import the Font() function from the openpyxl.styles module.
Table 13-2: Keyword Arguments for Font Objects
==============================================
Keyword argument Data type Description
---------------- --------- -----------
name String The font name, such as 'Calibri' or 'Times New Roman'
size Integer The point size
bold Boolean True, for bold font
italic Boolean True, for italic font
Call Font() to create a Font object and store that Font object in a variable.
Then assign that variable to a Cell object’s font attribute.
'''
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
print(os.getcwd())
wb = openpyxl.Workbook() # creates a WB with a single sheet called 'Sheet'
wb.create_sheet() # Creates a second shhet called 'Sheet1'
print(wb.sheetnames)
wb.create_sheet(index=0, title='S1')
wb.create_sheet(index=1, title='S2')
wb.create_sheet(index=2, title='S3')
print(wb.sheetnames)
# Deleting worksheets
del wb['Sheet']
del wb['Sheet1']
try:
del wb['First']
except KeyError:
print('Worksheet "First" does not exist')
print(wb.sheetnames)
### Font Objects
sheet = wb['S3']
# Create a font object.
italic24Font = Font(size=24, italic=True)
# Apply the font to A1.
sheet['A1'].font = italic24Font
sheet['A1'] = 'Hello!'
#
# Add a value to cell A2
sheet['A2'] = 'Bye'
# Create a font object.
myFont = Font(name='New Courier', strike=True, color='CC0000', size=16, italic=False, shadow=True, bold=True)
# Apply the font to A2.
sheet['A2'].font = myFont
sheet['A2'] = 'This is my font'
#
# Create a font object.
myFont = Font(name='New Courier', color='00FF00', size=10, shadow=True, bold=True)
# Add a value and apply the font to A3.
sheet['A3'] = 'This is my other font'
sheet['A3'].font = myFont
sheet = wb['S1']
# Set the Area's cell font values using two for loops:
# The outer for goes over each row in the slice;
# The nested for loop goes through each cell in that row,
# sets the font to myFont
# and sets the value to the cells coordinate name (.coordinate)
for rowOfCellObjects in sheet['A1':'C4']:
for cellObj in rowOfCellObjects:
cellObj.font = myFont
cellObj.value = str(cellObj.coordinate)
print(wb.active)
wb.save('styles.xlsx')
wb.close()
from openpyxl.styles import Font
from openpyxl.styles.colors import Color
'''
Adapted from https://automatetheboringstuff.com/2e/chapter13/
Data files used can be found at this link
Customizing the Font Style of Cells
Import the Font() function from the openpyxl.styles module.
Table 13-2: Keyword Arguments for Font Objects
==============================================
Keyword argument Data type Description
---------------- --------- -----------
name String The font name, such as 'Calibri' or 'Times New Roman'
size Integer The point size
bold Boolean True, for bold font
italic Boolean True, for italic font
Call Font() to create a Font object and store that Font object in a variable.
Then assign that variable to a Cell object’s font attribute.
'''
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
print(os.getcwd())
wb = openpyxl.Workbook() # creates a WB with a single sheet called 'Sheet'
wb.create_sheet() # Creates a second shhet called 'Sheet1'
print(wb.sheetnames)
wb.create_sheet(index=0, title='S1')
wb.create_sheet(index=1, title='S2')
wb.create_sheet(index=2, title='S3')
print(wb.sheetnames)
# Deleting worksheets
del wb['Sheet']
del wb['Sheet1']
try:
del wb['First']
except KeyError:
print('Worksheet "First" does not exist')
print(wb.sheetnames)
### Font Objects
sheet = wb['S3']
# Create a font object.
italic24Font = Font(size=24, italic=True)
# Apply the font to A1.
sheet['A1'].font = italic24Font
sheet['A1'] = 'Hello!'
#
# Add a value to cell A2
sheet['A2'] = 'Bye'
# Create a font object.
myFont = Font(name='New Courier', strike=True, color='CC0000', size=16, italic=False, shadow=True, bold=True)
# Apply the font to A2.
sheet['A2'].font = myFont
sheet['A2'] = 'This is my font'
#
# Create a font object.
myFont = Font(name='New Courier', color='00FF00', size=10, shadow=True, bold=True)
# Add a value and apply the font to A3.
sheet['A3'] = 'This is my other font'
sheet['A3'].font = myFont
sheet = wb['S1']
# Set the Area's cell font values using two for loops:
# The outer for goes over each row in the slice;
# The nested for loop goes through each cell in that row,
# sets the font to myFont
# and sets the value to the cells coordinate name (.coordinate)
for rowOfCellObjects in sheet['A1':'C4']:
for cellObj in rowOfCellObjects:
cellObj.font = myFont
cellObj.value = str(cellObj.coordinate)
print(wb.active)
wb.save('styles.xlsx')
wb.close()
Excel Spreadsheet Manipulation with ord() and chr() in Python
import openpyxl, os
from openpyxl.styles import Font
from openpyxl.styles.colors import Color
'''
Adapted from https://automatetheboringstuff.com/2e/chapter13/
Data files used can be found at this link
Formulas
========
Excel formulas can configure cells to contain values calculated from other cells.
>>> sheet['B9'] = '=SUM(B1:B8)'
This will store =SUM(B1:B8) as the value in cell B9.
'''
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
print(os.getcwd())
wb = openpyxl.Workbook() # creates a WB with a single default sheet called 'Sheet'
wb.create_sheet(index=0, title='S1')
wb.create_sheet(index=1, title='S2')
wb.create_sheet(index=2, title='S3')
# Deleting default worksheet
try:
del wb['Sheet']
print('Removed Worksheet "Sheet"')
except KeyError:
print('Worksheet "Sheet" does not exist')
print(wb.sheetnames)
myFont = Font(name='Arial', color='FF0000', size=14, bold=True)
# Set sheet 'S3' as active worksheet
wb.active = wb['S3']
print(wb.active)
# Manipulate sheet 'S3'
sheet = wb.active
sheet['A1'] = 200
sheet['A2'] = 300
# Set the formula.
sheet['A3'] = '=SUM(A1:A2)'
print(sheet['A3'].value)
# Set sheet 'S2' as active worksheet
wb.active = wb['S2']
print(wb.active)
sheet = wb.active
'''
# Set the Area's cell values using two for loops:
# The outer for goes over each row in the slice;
# The nested for loop goes through each cell in that row,
# sets the font to myFont
# and sets the value to val
'''
for row in sheet['A1':'C4']:
val = 0
alpha = ord('A')
chr(alpha)
for cell in row:
val += 1
cell.value = val
cell.font = myFont
'''
Sets the row segment (Area A5 to C5) to the sum of the cells in its column
ord('A') returns the ascii integer value for 'A' --> 65
chr(65) returns character value for ascii integer value 65 --> 'A'
Set alpha in the loop to increment by 1 and then use chr(alpha) to move from
column A to B to C. This loop accomplishes the same task as doing this:
sheet['A5'] = '=SUM(A1:A4)'
sheet['B5'] = '=SUM(B1:B4)'
sheet['C5'] = '=SUM(C1:C4)'
'''
number = 5
i = 0
for row in sheet['A5':'C5']:
for cell in row:
alpha = ord('A') + i
alphaNum = '%s%s' %(chr(alpha), number)
print('%s %s %s' %(alphaNum, alpha, i))
sheet[alphaNum] = '=SUM(%s1:%s4)' %(chr(alpha), chr(alpha))
i += 1
wb.save('formulas.xlsx')
wb.close()
from openpyxl.styles import Font
from openpyxl.styles.colors import Color
'''
Adapted from https://automatetheboringstuff.com/2e/chapter13/
Data files used can be found at this link
Formulas
========
Excel formulas can configure cells to contain values calculated from other cells.
>>> sheet['B9'] = '=SUM(B1:B8)'
This will store =SUM(B1:B8) as the value in cell B9.
'''
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
print(os.getcwd())
wb = openpyxl.Workbook() # creates a WB with a single default sheet called 'Sheet'
wb.create_sheet(index=0, title='S1')
wb.create_sheet(index=1, title='S2')
wb.create_sheet(index=2, title='S3')
# Deleting default worksheet
try:
del wb['Sheet']
print('Removed Worksheet "Sheet"')
except KeyError:
print('Worksheet "Sheet" does not exist')
print(wb.sheetnames)
myFont = Font(name='Arial', color='FF0000', size=14, bold=True)
# Set sheet 'S3' as active worksheet
wb.active = wb['S3']
print(wb.active)
# Manipulate sheet 'S3'
sheet = wb.active
sheet['A1'] = 200
sheet['A2'] = 300
# Set the formula.
sheet['A3'] = '=SUM(A1:A2)'
print(sheet['A3'].value)
# Set sheet 'S2' as active worksheet
wb.active = wb['S2']
print(wb.active)
sheet = wb.active
'''
# Set the Area's cell values using two for loops:
# The outer for goes over each row in the slice;
# The nested for loop goes through each cell in that row,
# sets the font to myFont
# and sets the value to val
'''
for row in sheet['A1':'C4']:
val = 0
alpha = ord('A')
chr(alpha)
for cell in row:
val += 1
cell.value = val
cell.font = myFont
'''
Sets the row segment (Area A5 to C5) to the sum of the cells in its column
ord('A') returns the ascii integer value for 'A' --> 65
chr(65) returns character value for ascii integer value 65 --> 'A'
Set alpha in the loop to increment by 1 and then use chr(alpha) to move from
column A to B to C. This loop accomplishes the same task as doing this:
sheet['A5'] = '=SUM(A1:A4)'
sheet['B5'] = '=SUM(B1:B4)'
sheet['C5'] = '=SUM(C1:C4)'
'''
number = 5
i = 0
for row in sheet['A5':'C5']:
for cell in row:
alpha = ord('A') + i
alphaNum = '%s%s' %(chr(alpha), number)
print('%s %s %s' %(alphaNum, alpha, i))
sheet[alphaNum] = '=SUM(%s1:%s4)' %(chr(alpha), chr(alpha))
i += 1
wb.save('formulas.xlsx')
wb.close()
Create and save MS Excel Documents in Python
import openpyxl, pprint, os
"""
Writing Excel Documents
Creating and Saving Excel Documents
"""
### Change dir to datafiles
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
# Verify datafiles is pwd
print(os.getcwd())
### Create a blank workbook.
wb = openpyxl.Workbook()
# It starts with one sheet.
print(wb.sheetnames)
sheet = wb.active
# Change the sheet name by storing a new string in its title attribute.
sheet.title = 'Duke Today Sheet'
print(wb.sheetnames)
print(sheet.title)
### Creating and Removing Sheets
# Sheets can be added and removed with the create_sheet() method and del operator.
# Add a new sheet.
wb.create_sheet()
print(wb.sheetnames)
# The create_sheet() method returns a new Worksheet object named SheetX, which by
# default is set to be the last sheet in the workbook. Optionally, the index and
# name of the new sheet can be specified with the index and title keyword arguments.
# Create a new sheet at index 0 and index 2
wb.create_sheet(index=0, title='First Sheet')
wb.create_sheet(index=2, title='Middle Sheet')
print(wb.sheetnames)
# Deleting a worksheet
del wb['Spam Bacon Eggs Sheet']
print(wb.sheetnames)
### Writing Values to Cells
sheet = wb['Sheet']
sheet['A1'] = 'Hello, world!'
print(sheet['A1'].value)
### Save workbook
# The spreadsheet file will not be saved until you call the save() workbook method
# with a name string.
wb.save('Workbook.xlsx')
"""
Writing Excel Documents
Creating and Saving Excel Documents
"""
### Change dir to datafiles
os.chdir('/Google Drive/python_work/MyPythonScripts/datafiles')
# Verify datafiles is pwd
print(os.getcwd())
### Create a blank workbook.
wb = openpyxl.Workbook()
# It starts with one sheet.
print(wb.sheetnames)
sheet = wb.active
# Change the sheet name by storing a new string in its title attribute.
sheet.title = 'Duke Today Sheet'
print(wb.sheetnames)
print(sheet.title)
### Creating and Removing Sheets
# Sheets can be added and removed with the create_sheet() method and del operator.
# Add a new sheet.
wb.create_sheet()
print(wb.sheetnames)
# The create_sheet() method returns a new Worksheet object named SheetX, which by
# default is set to be the last sheet in the workbook. Optionally, the index and
# name of the new sheet can be specified with the index and title keyword arguments.
# Create a new sheet at index 0 and index 2
wb.create_sheet(index=0, title='First Sheet')
wb.create_sheet(index=2, title='Middle Sheet')
print(wb.sheetnames)
# Deleting a worksheet
del wb['Spam Bacon Eggs Sheet']
print(wb.sheetnames)
### Writing Values to Cells
sheet = wb['Sheet']
sheet['A1'] = 'Hello, world!'
print(sheet['A1'].value)
### Save workbook
# The spreadsheet file will not be saved until you call the save() workbook method
# with a name string.
wb.save('Workbook.xlsx')
Subscribe to:
Posts (Atom)