@article{Wright_Zheng_2020, title={Topological Data Analysis on Simple English Wikipedia Articles}, volume={3}, url={https://journals.calstate.edu/pump/article/view/2410}, abstractNote={<p style="-qt-block-indent: 0; text-indent: 0px; margin: 0px;">Single-parameter persistent homology, a key tool in topological data analysis, has been widely applied to data problems along with statistical techniques that quantify the significance of the results.</p> <p style="-qt-block-indent: 0; text-indent: 0px; margin: 0px;">In contrast, statistical techniques for two-parameter persistence, while highly desirable for real-world applications, have scarcely been considered.</p> <p style="-qt-block-indent: 0; text-indent: 0px; margin: 0px;">We present three statistical approaches for comparing geometric data using two-parameter persistent homology; these approaches rely on the Hilbert function, matching distance, and barcodes obtained from two-parameter persistence modules computed from the point-cloud data.</p> <p style="-qt-block-indent: 0; text-indent: 0px; margin: 0px;">Our statistical methods are broadly applicable for analysis of geometric data indexed by a real-valued parameter.</p> <p style="-qt-block-indent: 0; text-indent: 0px; margin: 0px;">We apply these approaches to analyze high-dimensional point-cloud data obtained from Simple English Wikipedia articles.</p> <p style="-qt-block-indent: 0; text-indent: 0px; margin: 0px;">In particular, we show how our methods can be utilized to distinguish certain subsets of the Wikipedia data and to compare with random data.</p> <p style="-qt-block-indent: 0; text-indent: 0px; margin: 0px;">These results yield insights into the construction of null distributions and stability of our methods with respect to noisy data.</p>}, journal={The PUMP Journal of Undergraduate Research}, author={Wright, Matthew and Zheng, Xiaojun}, year={2020}, month={Dec.}, pages={308-328} }