Source code for solidipes_core_plugin.loaders.table

import re

import pandas as pd
from solidipes.loaders.file import File
from solidipes.utils import solidipes_logging as logging
from solidipes.validators.validator import validator

logger = logging.getLogger()


[docs] class Table(File): """Table file loaded with Pandas""" from ..viewers.table import Table as TableViewer supported_mime_types = { "text/csv": "csv", "text/ssv": "ssv", "text/plain": ["csv", "ssv"], "application/vnd.ms-excel": ["csv", "xlsx"], "application/numpy/array": "npy", "application/octet-stream": "npy", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx", } _compatible_viewers = [TableViewer] def __init__(self, **kwargs): super().__init__(**kwargs) path = kwargs["path"] # find loader matching file extension if self.file_info.type == "text/csv" or self.file_info.extension in ["csv"]: self.pandas_loader = self.read_csv elif self.file_info.type == "text/ssv" or self.file_info.extension in ["ssv"]: self.pandas_loader = self.read_ssv elif self.file_info.type == "text/plain": self.pandas_loader = pd.read_csv elif self.file_info.type == "application/vnd.ms-excel" or self.file_info.extension in ["xlsx"]: self.pandas_loader = pd.read_excel elif self.file_info.extension.startswith("application/numpy") or self.file_info.extension == "npy": self.pandas_loader = self.read_numpy else: raise RuntimeError(f"File type not supported: {path} {self.file_info.type} {self.file_info.extension}")
[docs] def read_csv(self, fname, **kwargs): ret = pd.read_csv(fname, sep=r",", **kwargs) return ret
[docs] def read_ssv(self, fname, **kwargs): ret = pd.read_csv(fname, sep=r"\s+", **kwargs) return ret
[docs] def read_numpy(self, fname, **kwargs): import numpy f = numpy.load(fname) f = pd.DataFrame(f) return f
@validator(description="Table has valid header") def _has_valid_header(self) -> bool: for h in self.header_list: try: h = float(h) self.add_validation_error([f"Incorrect header: {self.header_list}"]) return False except Exception: pass if h.startswith("Unnamed"): self.add_validation_error([f"Incorrect header: {self.header_list}"]) return False return True @File.loadable def header_list(self): data = self.pandas_loader(self.file_info.path, nrows=0) logger.debug(data) logger.debug(data.columns) return [str(e) for e in data.columns] @File.loadable def header(self) -> str: return ", ".join(self.header_list) @File.rocrate_metadata def column_names(self) -> list[str]: """Return column names without units or other annotations in brackets or braces.""" return [re.sub(r"\s*[\[\(].*[\]\)]\s*$", "", h).strip() for h in self.header_list] @File.rocrate_metadata def column_units(self) -> list[str]: """Return units or other annotations in brackets or braces, or an empty string if not present.""" pattern = re.compile(r".*[\[\(](.*)[\]\)]\s*$") return [(m.group(1).strip() if (m := pattern.match(h)) else "") for h in self.header_list] @File.loadable def table(self): return self.pandas_loader(self.file_info.path)