import xml.etree.ElementTree as ET import pandas as pd def series(cols, rows): d = {} l = len(rows[0]) for idx, c in enumerate(cols): series = [] for s in range(l): vals = [] for r in rows: vals.append(r[s]) series.append(vals) d[c] = pd.Series(series[idx]) return d def createDataframe(dat): return pd.DataFrame(dat) def tcxDataFrameColumns(cls): cols = [] cols.append("Name") for vals in cls.findall("features/feature"): cols.append(vals.find('name').text) return cols def tcxDataFrameRows(comp): row = [] row.append(comp.find('name').text) for fv in comp.findall("feature-values/feature-value"): vals = [] for v in fv.findall("value"): vals.append(v.text) if len(vals) == 0: for v in fv.findall("int-range/integer"): vals.append(v.text) for v in fv.findall("float-range/float"): vals.append(v.text) if len(vals) > 1: row.append(vals) else: row.append(vals[0]) return row def tcx_parse(tcx): dataframes = [] tree = ET.parse(tcx) root = tree.getroot() for cc in root.findall("./model/component-classes/component-class"): data = [] dframe = tcxDataFrameColumns(cc) for cv in cc.findall("components/component"): data.append(tcxDataFrameRows(cv)) if not data: continue else: ds = series(dframe, data) dat = createDataframe(ds) dat.name = cc.find('name').text dataframes.append(dat) print(dat.name) return dataframes if __name__ == "__main__": dataframes = tcx_parse("test-data/sDecanter_main.tcx") for d in dataframes: print(d)