Source code for inetsix.excel

"""Python class to serialize Excel spreadsheet to Python structures

Allow to extract data from an Excel sheet to convert it to different 
This class supports import from different tab from Excel file and can serialize both: table and list

"""

__license__ = "GPL"
__docformat__ = 'reStructuredText'


import os
from optparse import OptionParser
import pprint
import sys
import json
import openpyxl
import itertools


[docs]class ExcelSerializer: """Convert Excel data into python structures. Allow to extract data from an Excel sheet to convert it to different This class supports import from different tab from Excel file and can serialize both: table and list :Example: Output example for a table >>> table = ExcelSerializer(excel_path='../excel/input.xlsx') >>> table.serialize_table(sheet="Topology", nb_columns=6) >>> print table.get_data(sheet="Topology") [ { 'id': '1', u'local_device': u'dev1', u'local_port': u'port1', u'local_port_name': u'et-0/0/0', u'remote_device': u'dev2', u'remote_port': u'port1', u'remote_port_name': u'et-0/0/0'}, { 'id': '2', u'local_device': u'dev1', u'local_port': u'port2', u'local_port_name': u'et-0/0/1', u'remote_device': u'dev3', u'remote_port': u'port1', u'remote_port_name': u'et-0/0/1'}, { 'id': '3', u'local_device': u'dev2', u'local_port': u'port3', u'local_port_name': u'et-0/0/2', u'remote_device': u'dev4', u'remote_port': u'port1', u'remote_port_name': u'et-0/0/2'}] :Example: Output example for a list >>> table = ExcelSerializer(excel_path='../excel/input.xlsx') >>> table.serialized_list(sheet="List") >>> print table.get_data(sheet="List") { 'asn_base': '65000', 'bgp_export': 'underlay-export', 'bgp_group': 'underlay', 'bgp_import': 'underlay-import', 'mtu_phy_int': ['9200', '1000'] } .. todo:: Create a method to locate data in Excel Sheet. """ def __init__(self, excel_path = None, sheet="Sheet1", has_header=True, nb_columns=0): """Class constructor. Create a new ExcelSeiralizer object from an Excel file and a sheet :param excel_path: Path to excel file you want to read data (Optional). :type excel_path: str. :param sheet: Tab name where data are located. (Default is Sheet1) :type sheet: str. :param has_header: Boolean to enable header finding. :type has_header: bool. :param nb_columns: Number of columns to look for data :type nb_columns: int. """ self.__yaml=dict() self.__filename = excel_path
[docs] def serialize_table(self, sheet=None, nb_columns=2, has_header=True): """Serialize Excel file into python structure. Open Excel file defined in ``self.__filename`` and look for data Because columns are using letters instead of numbers, it is required to convert decimal values to alphabet letters. For every line, function is reading every cell and move them to a ``dict`` entry Once line is completely read, ``dict`` is appended to a list of all lines. :Example: >>> table = ExcelSerializer(excel_path='../excel/input.xlsx') >>> table.serialize_table(sheet="Topology", nb_columns=6) .. note:: Values are also saved in ``self._data_array`` for later use. :param sheet: Sheet name to read :type sheet: str. :param nb_columns: Number of column to read :type nb_columns: int. :param has_header: Boolean to enable header finding. :type has_header: bool. :returns: YAML compatible list of all extracted lines :rtype: list """ wb = openpyxl.load_workbook(self.__filename) ws = wb[sheet] _data_array=[] _header = self.__find_header(sheet=sheet, nb_columns=nb_columns) # because Excel columns are alphabet letters instead of number, # we have to convert decimal to letter. # First letter 'a' is identify with code 97 # Max column is 97(a) + nb_columns -1 col_max = chr(97-1+nb_columns) cell_range = 'a{}:{}{}'.format(ws.min_row+1 , col_max, ws.max_row) row_id = 1 # Extract ROW from the range for row in ws[cell_range]: line = dict() cell_index = 0 # Read every CELL part of the row and within our scope for cell in row: # Force casting to str for all cells. Useful if excel encode a value to int or boolean if has_header is True: line[_header[cell_index]] = str(cell.value).lower() else: line[cell_index] = str(cell.value).lower() cell_index +=1 # Add ROW_ID to create unique ID line['id'] = str(row_id) # Add line to the complete dict _data_array.append(line) row_id += 1 self.__yaml[self._string_cleanup(sheet)] = _data_array return self.__yaml[self._string_cleanup(sheet)]
[docs] def serialize_list(self, sheet=None, nb_columns=2): """Serialize a list into a YAML compliant format. :Example: >>> table = ExcelSerializer(excel_path='../excel/input.xlsx') >>> table.serialized_list(sheet="List") .. note:: Values are also saved in ``self._data_array`` for later use. :param sheet: Sheet name to read (default=None, optional)) :type sheet: str. :param nb_columns: Number of column to read (optional) :type nb_columns: int. :returns: YAML compatible dictionnary with all extracted values from the list :rtype: dict """ wb = openpyxl.load_workbook(self.__filename) ws = wb[sheet] _data = dict() col_max = chr(97-1+nb_columns) cell_range = 'a{}:{}{}'.format(ws.min_row , col_max, ws.max_row) for row in ws[cell_range]: _data[str(row[0].value).lower()] = self._serialize_cell( xlsCellStr = str(row[1].value).lower() ) self.__yaml[self._string_cleanup(sheet)] = _data return self.__yaml[self._string_cleanup(sheet)]
def __find_header(self, sheet=None, nb_columns=2): """Look for array in the table and provide list of them. Extract table header and create a dict() to store them with a standardize name: lower and replace space by "_" :param sheet: Sheet name to read (default=None, optional)) :type sheet: str. :param nb_columns: Number of column to read (optional) :type nb_columns: int. :returns: list of headers found in given sheet :rtype: list """ header = [] wb = openpyxl.load_workbook(self.__filename) ws = wb[sheet] col_max = chr(97-1+nb_columns) cell_range = 'a{}:{}{}'.format(ws.min_row, col_max, 1) for row in ws[cell_range]: for cell in row: header.append( self._string_cleanup(cell.value) ) return header
[docs] def _serialize_cell( self, xlsCellStr=None): """Serialize a cell by doing some cleanup. Transform multi lines content to a single line str. :param xlsCellStr: content of a cell to serialize and cleanup (default=None) :type xlsCellStr: str :returns: Single line string :rtype: str """ if len(str(xlsCellStr).split('\n')) > 1: return str(xlsCellStr).split('\n') else: return xlsCellStr
[docs] def _string_cleanup( self, string ): """Cleanup a string to lower and remove space. :param string: String to cleanup :type string: str :returns: Cleanup string :rtype: str """ if isinstance(string, basestring): return string.replace(' ', '_').lower() else: return string
[docs] def get_data(self, sheet=None): """Provide a read only access to table data .. note:: This function is a wrapper to ``get_yaml()`` function :Example: >>> table = ExcelSerializer(excel_path='../excel/input.xlsx') >>> table.serialized_list(sheet="List") >>> print table.get_data(sheet="List") { 'asn_base': '65000', 'bgp_export': 'underlay-export', 'bgp_group': 'underlay', 'bgp_import': 'underlay-import', 'mtu_phy_int': ['9200', '1000'] } :param sheet: Sheet name to read (default=None, optional)) :type sheet: str. :returns: A YAML structure with data found in given sheet :rtype: dict """ return self.get_yaml(sheet=sheet)
[docs] def get_header(self): """Provide a read only access to header table data. :**Example**: >>> table = ExcelSerializer(excel_path='../excel/input.xlsx') >>> table.serialize_table(sheet="Topology", nb_columns=6) >>> print table.get_header() [ 'asn_base', 'bgp_export', 'bgp_group', 'bgp_import', 'mtu_phy_int' ] :returns: A YAML structure with headers found in given sheet :rtype: list """ return self._header
[docs] def get_yaml(self, sheet=None): """Return a per sheet Python structure compatible with YAML language :Example: >>> table = ExcelSerializer(excel_path='../excel/input.xlsx') >>> table.serialized_list(sheet="List") >>> print table.get_yaml(sheet="List") { 'asn_base': '65000', 'bgp_export': 'underlay-export', 'bgp_group': 'underlay', 'bgp_import': 'underlay-import', 'mtu_phy_int': ['9200', '1000'] } :param sheet: Sheet name to read (default=None, optional)) :type sheet: str. :returns: A YAML structure with data found in given sheet :rtype: dict """ if self._string_cleanup(sheet) in self.__yaml: return self.__yaml[self._string_cleanup(sheet)] else: return None
[docs] def get_yaml_all(self): """Get Yaml data. Provide a dictionary of all data found in all sheets parsed in this Excel file. :Example: >>> table = ExcelSerializer(excel_path='../excel/input.xlsx') >>> table.serialized_list(sheet="List") >>> table.serialize_table(sheet="Topology", nb_columns=6) >>> table.get_yaml_all() { 'List': { 'asn_base': '65000', 'bgp_export': 'underlay-export', 'bgp_group': 'underlay', 'bgp_import': 'underlay-import', 'mtu_phy_int': ['9200', '1000'] }, 'Topology': [ { 'id': '1', u'local_device': u'dev1', u'local_port': u'port1', u'local_port_name': u'et-0/0/0', u'remote_device': u'dev2', u'remote_port': u'port1', u'remote_port_name': u'et-0/0/0'}, { 'id': '2', u'local_device': u'dev1', u'local_port': u'port2', u'local_port_name': u'et-0/0/1', u'remote_device': u'dev3', u'remote_port': u'port1', u'remote_port_name': u'et-0/0/1'}, { 'id': '3', u'local_device': u'dev2', u'local_port': u'port3', u'local_port_name': u'et-0/0/2', u'remote_device': u'dev4', u'remote_port': u'port1', u'remote_port_name': u'et-0/0/2'}] ] } :param sheet: Sheet name to read (default=None, optional)) :type sheet: str. :returns: A YAML structure with data found in sheets parsed previously :rtype: dict """ return self.__yaml