Source code for khorosjx.utils.df_utils
# -*- coding: utf-8 -*-
"""
:Module: khorosjx.utils.df_utils
:Synopsis: Useful tools and utilities to assist in importing, manipulating and exporting pandas dataframes
:Usage: ``from khorosjx import df_utils``
:Example: TBD
:Created By: Jeff Shurtliff
:Last Modified: Jeff Shurtliff
:Modified Date: 18 Dec 2019
"""
import pandas as pd
# Define function to convert a list of dictionaries to a pandas dataframe
[docs]def convert_dict_list_to_dataframe(dict_list, column_names=[]):
"""This function converts a list of dictionaries into a pandas dataframe.
:param dict_list: List of dictionaries
:type dict_list: list
:param column_names: The column names for the dataframe (Optional)
:type column_names: list
:returns: A pandas dataframe of the data
"""
# Identify the dataframe column names
if len(column_names) == 0:
for field_name in dict_list[0].keys():
column_names.append(field_name)
# Identify the data for each column
df_data = []
for idx in range(0, len(dict_list)):
row_data = []
for field_value in dict_list[idx].values():
row_data.append(field_value)
df_data.append(row_data)
# Create and return the dataframe
dataframe = pd.DataFrame(df_data, columns=column_names)
return dataframe
[docs]def import_csv(file_path, delimiter=",", column_names=[], columns_to_return=[], has_headers=True):
"""This function imports a CSV file to generate a dataframe.
:param file_path: The absolute path to the CSV file to be imported
:type file_path: str
:param delimiter: The column delimiter utilized in the CSV
:type delimiter: str
:param column_names: The column names to use with the imported dataframe (Optional)
:type column_names: list
:param columns_to_return: Determines which of the columns should actually be returned (Default: all columns)
:param has_headers: Defines whether or not the data in the file has column headers (Default: ``True``)
:type has_headers: bool
:returns: The imported data as a pandas dataframe
:raises: FileNotFoundError, TypeError
"""
# Determine the appropriate use case and then import and return the dataframe
if has_headers is False and len(column_names) == 0:
if len(columns_to_return) == 0: # Use Case: Headless
dataframe = pd.read_csv(file_path, sep=delimiter, header=None)
else: # Use Case: Headless Filtered
dataframe = pd.read_csv(file_path, sep=delimiter, header=None, usecols=columns_to_return)
else:
if len(column_names) > 0 and len(columns_to_return) > 0: # Use Case: Custom Filtered
dataframe = pd.read_csv(file_path, sep=delimiter, names=column_names)
dataframe = dataframe[columns_to_return]
elif len(column_names) > 0 and len(columns_to_return) == 0: # Use Case: Custom
dataframe = pd.read_csv(file_path, sep=delimiter, names=column_names)
elif len(column_names) == 0 and len(columns_to_return) > 0: # Use Case: Filtered
dataframe = pd.read_csv(file_path, sep=delimiter, usecols=columns_to_return)
else: # Use Case: Default
dataframe = pd.read_csv(file_path, sep=delimiter)
return dataframe
[docs]def import_excel(file_path, excel_sheet='', use_first_sheet=False,
column_names=[], columns_to_return=[], has_headers=True):
"""This function imports a Microsoft Excel file to generate a dataframe.
:param file_path: The absolute path to the Excel file to be imported
:type file_path: str
:param excel_sheet: The name of the specific sheet in the file to import
:type excel_sheet: str
:param use_first_sheet: Defines whether or not the first sheet in the file should be used (Default: ``False``)
:type use_first_sheet: bool
:param column_names: The column names to use with the imported dataframe (Optional)
:type column_names: list
:param columns_to_return: Determines which of the columns should actually be returned (Default: all columns)
:param has_headers: Defines whether or not the data in the file has column headers (Default: ``True``)
:type has_headers: bool
:returns: The imported data as a pandas dataframe
:raises: FileNotFoundError, TypeError
"""
# Determine the appropriate use case and then import and return the dataframe
if excel_sheet != "" and use_first_sheet is False:
if has_headers is False and len(column_names) == 0:
if columns_to_return == 0: # Use Case: Headless
excel_data = pd.read_excel(file_path, sheet_name=excel_sheet, header=None)
else: # Use Case: Headless Filtered
excel_data = pd.read_excel(file_path, sheet_name=excel_sheet, header=None, usecols=columns_to_return)
else:
if len(column_names) > 0 and len(columns_to_return) > 0: # Use Case: Custom Filtered
excel_data = pd.read_excel(file_path, names=column_names)
excel_data = excel_data[columns_to_return]
elif len(column_names) > 0 and len(columns_to_return) == 0: # Use Case: Custom
excel_data = pd.read_excel(file_path, names=column_names)
elif len(column_names) == 0 and len(columns_to_return) > 0: # Use Case: Filtered
excel_data = pd.read_excel(file_path, usecols=columns_to_return)
else: # Use Case: Default
excel_data = pd.read_excel(file_path)
if use_first_sheet: # Use Case: Use First Sheet
excel_data = excel_data[0]
return excel_data