Interacting with AWS Glue
In [1]:
import boto3
In [2]:
boto3.__version__
Out[2]:
In [3]:
def get_databases():
"""
Returns the databases available in the Glue data catalog
:return: list of databases
"""
return [dat["Name"] for dat in glue_client.get_databases()["DatabaseList"]]
In [4]:
def get_tables_for_database(database):
"""
Returns a list of tables in a Glue database catalog
:param database: Glue database
:return: list of tables
"""
starting_token = None
next_page = True
tables = []
while next_page:
paginator = glue_client.get_paginator(operation_name="get_tables")
response_iterator = paginator.paginate(
DatabaseName=database,
PaginationConfig={"PageSize": 100, "StartingToken": starting_token},
)
for elem in response_iterator:
tables += [
{
"name": table["Name"],
}
for table in elem["TableList"]
]
try:
starting_token = elem["NextToken"]
except:
next_page = False
return tables
Setup the Glue client with boto3
:
In [5]:
glue_client = boto3.client('glue', 'eu-west-1')
Create two tables in the default
database:
In [6]:
params = {
'DatabaseName': 'default',
'TableInput': {
'Name': 'table_one',
}
}
glue_client.create_table(**params)
params['TableInput'].update({'Name': 'table_two'})
glue_client.create_table(**params)
Out[6]:
List the tables from the databases that contain the string default
:
In [7]:
for database in [dat for dat in get_databases() if 'default' in dat]:
print(f"Database: {database}")
for table in get_tables_for_database(database):
print(f"Table: {table['name']}")
Clean-up:
In [8]:
params = {
'DatabaseName': 'default',
'Name': 'table_one',
}
glue_client.delete_table(**params)
params.update({'Name': 'table_two'})
glue_client.delete_table(**params)
Out[8]:
Verification:
In [9]:
for database in [dat for dat in get_databases() if 'default' in dat]:
print(f"Database: {database}")
for table in get_tables_for_database(database):
print(f"Table: {table['name']}")