@inproceedings{dam_towards_2023, address = {Austin, TX, United States}, title = {Towards a {Critical} {Open}-{Source} {Software} {Database}}, copyright = {Grey OA}, isbn = {978-1-4503-9419-2}, doi = {10.1145/3543873.3587336}, booktitle = {Companion {Proceedings} of the {ACM} {Web} {Conference} 2023}, publisher = {ACM}, author = {Dam, Tobias and Klausner, Lukas Daniel and Neumaier, Sebastian}, year = {2023}, keywords = {Department Technologie, FH SP Cyber Security, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Poster, Publikationstyp Konferenz-Paper, SP IT Sec Applied Security \& Data Science, Wiss. Beitrag, peer-reviewed}, pages = {156--159}, } @article{umbrich_towards_2015, title = {Towards assessing the quality evolution of {Open} {Data} portals}, url = {https://aic.ai.wu.ac.at/~polleres/publications/umbr-etal-2015ODQ.pdf}, abstract = {In this work, we present the Open Data Portal Watch project, a public framework to continuously monitor and assess the (meta-)data quality in Open Data portals. We critically discuss the objectiveness of various quality metrics. Further, we report on early findings based on 22 weekly snapshots of 90 CKAN portals and highlight interesting observations and challenges.}, language = {en}, number = {In ODQ2015: Open Data Quality: from Theory to Practice Workshop}, author = {Umbrich, Jürgen and Neumaier, Sebastian and Polleres, Axel}, year = {2015}, keywords = {Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science, Wiss. Beitrag, peer-reviewed}, pages = {5}, } @inproceedings{neumaier_talking_2017, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Talking {Open} {Data}}, isbn = {978-3-319-70407-4}, doi = {10/gnt2vz}, abstract = {Enticing users into exploring Open Data remains an important challenge for the whole Open Data paradigm. Standard stock interfaces often used by Open Data portals are anything but inspiring even for tech-savvy users, let alone those without an articulated interest in data science. To address a broader range of citizens, we designed an open data search interface supporting natural language interactions via popular platforms like Facebook and Skype. Our data-aware chatbot answers search requests and suggests relevant open datasets, bringing fun factor and a potential of viral dissemination into Open Data exploration. The current system prototype is available for Facebook (https://m.me/OpenDataAssistant) and Skype (https://join.skype.com/bot/6db830ca-b365-44c4-9f4d-d423f728e741) users.}, language = {en}, booktitle = {The {Semantic} {Web}: {ESWC} 2017 {Satellite} {Events}}, publisher = {Springer International Publishing}, author = {Neumaier, Sebastian and Savenkov, Vadim and Vakulenko, Svitlana}, editor = {Blomqvist, Eva and Hose, Katja and Paulheim, Heiko and Ławrynowicz, Agnieszka and Ciravegna, Fabio and Hartig, Olaf}, year = {2017}, keywords = {Forschungsgr, Institut für IT Sicherheitsforschung, Poster, peer-reviewed}, pages = {132--136}, } @phdthesis{neumaier_semantic_2019, type = {Thesis}, title = {Semantic enrichment of open data on the {Web} - or: how to build an open data knowledge graph}, shorttitle = {Semantic enrichment of open data on the {Web} - or}, url = {https://repositum.tuwien.at/handle/20.500.12708/3239}, abstract = {In the past years Open Data has become a trend among governments to increase transparency and public engagement by opening up national, regional, and local datasets. A huge amount of datasets became available that could potentially be integrated and linked into the Web of (Linked) Data. However, with the increasing number of published resources, there are a number of concerns with regards to the quality of the data sources and the corresponding metadata, which compromise the searchability, discoverability and usability of resources. In this work, we define quality dimension and metrics, and subsequently report findings based on a continuous monitoring and quality assessment of numerous Open Data portals. Semantic Web technologies provide enhanced search functionalities and allow to explore related content across data portals. However, as our reports show, current Open Data lacks in sufficient data quality, rich/consistent descriptions, and uniform vocabularies. Having identified and measured the existing quality issues, we outline methods to restore the quality of published resources, methods to recover the semantics of tabular Open Data, and methods to extract taxonomic, spatial and temporal information. Eventually, the aim of this work is to improve the overall quality and value of Open Data and to use the extracted semantic information to build an Open Data Knowledge Graph.}, language = {en}, urldate = {2020-12-01}, school = {Wien}, author = {Neumaier, Sebastian}, year = {2019}, note = {Accepted: 2020-06-28T06:29:01Z}, keywords = {Institut für IT Sicherheitsforschung}, } @inproceedings{heil_rebotingcom_2018, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {reboting.com: {Towards} {Geo}-search and {Visualization} of {Austrian} {Open} {Data}}, isbn = {978-3-319-98192-5}, shorttitle = {reboting.com}, doi = {10/gnd9kc}, abstract = {Data portals mainly publish semi-structured, tabular formats which lack semantic descriptions of geo-entities and therefore, do not allow any exploration and automated visualization of these datasets. Herein, we present a framework to add geo-semantic labels, based on a constructed geo-entity knowledge graph, and a user interface to query and automatically visualize the resources from the Austrian data portals. The web-application is available at https://reboting.com/.}, language = {en}, booktitle = {The {Semantic} {Web}: {ESWC} 2018 {Satellite} {Events}}, publisher = {Springer International Publishing}, author = {Heil, Erich and Neumaier, Sebastian}, editor = {Gangemi, Aldo and Gentile, Anna Lisa and Nuzzolese, Andrea Giovanni and Rudolph, Sebastian and Maleshkova, Maria and Paulheim, Heiko and Pan, Jeff Z and Alam, Mehwish}, year = {2018}, keywords = {Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Poster, peer-reviewed}, pages = {105--110}, } @inproceedings{neumaier_multi-level_2016, title = {Multi-level {Semantic} {Labelling} of {Numerical} {Values}}, url = {https://link.springer.com/chapter/10.1007/978-3-319-46523-4_26}, abstract = {With the success of Open Data a huge amount of tabular data sources became available that could potentially be mapped and linked into the Web of (Linked) Data. Most existing approaches to “semantically label” such tabular data rely on mappings of textual information to classes, properties, or instances in RDF knowledge bases in order to link – and eventually transform – tabular data into RDF. However, as we will illustrate, Open Data tables typically contain a large portion of numerical columns and/or non-textual headers; therefore solutions that solely focus on textual “cues” are only partially applicable for mapping such data sources. We propose an approach to find and rank candidates of semantic labels and context descriptions for a given bag of numerical values. To this end, we apply a hierarchical clustering over information taken from DBpedia to build a background knowledge graph of possible “semantic contexts” for bags of numerical values, over which we perform a nearest neighbour search to rank the most likely candidates. Our evaluation shows that our approach can assign fine-grained semantic labels, when there is enough supporting evidence in the background knowledge graph. In other cases, our approach can nevertheless assign high level contexts to the data, which could potentially be used in combination with other approaches to narrow down the search space of possible labels.}, booktitle = {The {Semantic} {Web} – {ISWC} 2016}, publisher = {Springer, Cham}, author = {Neumaier, Sebastian and Umbrich, Jürgen and Parreira, Josiane Xavier and Polleres, Axel}, month = oct, year = {2016}, keywords = {FH SP Data Analytics \& Visual Computing, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science, Wiss. Beitrag, best, best-neumaier, peer-reviewed}, pages = {428--445}, } @inproceedings{weber_odarchive_2020, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {{ODArchive} – {Creating} an {Archive} for {Structured} {Data} from {Open} {Data} {Portals}}, isbn = {978-3-030-62466-8}, doi = {10/gh38b8}, abstract = {We present ODArchive, a large corpus of structured data collected from over 260 Open Data portals worldwide, alongside with curated, integrated metadata. Furthermore we enrich the harvested datasets by heuristic annotations using the type hierarchies in existing Knowledge Graphs. We both (i) present the underlying distributed architecture to scale up regular harvesting and monitoring changes on these portals, and (ii) make the corpus available via different APIs. Moreover, we (iii) analyse the characteristics of tabular data within the corpus. Our APIs can be used to regularly run such analyses or to reproduce experiments from the literature that have worked on static, not publicly available corpora.}, language = {en}, booktitle = {The {Semantic} {Web} – {ISWC} 2020}, publisher = {Springer International Publishing}, author = {Weber, Thomas and Mitöhner, Johann and Neumaier, Sebastian and Polleres, Axel}, editor = {Pan, Jeff Z. and Tamma, Valentina and d’Amato, Claudia and Janowicz, Krzysztof and Fu, Bo and Polleres, Axel and Seneviratne, Oshani and Kagal, Lalana}, year = {2020}, keywords = {Archiving, Extern, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Open data, Profiling, Reference tables, Wiss. Beitrag, best, best-neumaier, peer-reviewed}, pages = {311--327}, } @inproceedings{kubler_open_2016, address = {New York, NY, USA}, series = {dg.o '16}, title = {Open {Data} {Portal} {Quality} {Comparison} using {AHP}}, isbn = {978-1-4503-4339-8}, url = {https://doi.org/10.1145/2912160.2912167}, doi = {10/gh3786}, abstract = {During recent years, more and more Open Data becomes available and used as part of the Open Data movement. However, there are reported issues with the quality of the metadata in data portals and the data itself. This is a serious risk that could disrupt the Open Data project, as well as e-government initiatives since the data quality needs to be managed to guarantee the reliability of e-government to the public. First quality assessment frameworks emerge to evaluate the quality for a given dataset or portal along various dimensions (e.g., information completeness). Nonetheless, a common problem with such frameworks is to provide meaningful ranking mechanisms that are able to integrate several quality dimensions and user preferences (e.g., a portal provider is likely to have different quality preferences than a portal consumer). To address this multi-criteria decision making problem, our research work applies AHP (Analytic Hierarchy Process), which compares 146 active Open Data portals across 44 countries, powered by the CKAN software.}, urldate = {2020-12-01}, booktitle = {Proceedings of the 17th {International} {Digital} {Government} {Research} {Conference} on {Digital} {Government} {Research}}, publisher = {Association for Computing Machinery}, author = {Kubler, Sylvain and Robert, Jérémy and Le Traon, Yves and Umbrich, Jürgen and Neumaier, Sebastian}, month = jun, year = {2016}, keywords = {Analytic Hierarchy Process, Data Quality, E-government, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Multi-Criteria Decision Making, Open Data, Wiss. Beitrag, peer-reviewed}, pages = {397--407}, } @article{neumaier_geo-semantic_2018, series = {Proceedings of the 14th {International} {Conference} on {Semantic} {Systems} 10th – 13th of {September} 2018 {Vienna}, {Austria}}, title = {Geo-{Semantic} {Labelling} of {Open} {Data}}, volume = {137}, issn = {1877-0509}, url = {http://www.sciencedirect.com/science/article/pii/S1877050918316065}, doi = {10/gh374p}, abstract = {In the past years Open Data has become a trend among governments to increase transparency and public engagement by opening up national, regional, and local datasets. However, while many of these datasets come in semi-structured file formats, they use different schemata and lack geo-references or semantically meaningful links and descriptions of the corresponding geo-entities. We aim to address this by detecting and establishing links to geo-entities in the datasets found in Open Data catalogs and their respective metadata descriptions and link them to a knowledge graph of geo-entities. This knowledge graph does not yet readily exist, though, or at least, not a single one: so, we integrate and interlink several datasets to construct our (extensible) base geo-entities knowledge graph: (i) the openly available geospatial data repository GeoNames, (ii) the map service OpenStreetMap, (iii) country-specific sets of postal codes, and (iv) the European Union’s classification system NUTS. As a second step, this base knowledge graph is used to add semantic labels to the open datasets, i.e., we heuristically disambiguate the geo-entities in CSV columns using the context of the labels and the hierarchical graph structure of our base knowledge graph. Finally, in order to interact with and retrieve the content, we index the datasets and provide a demo user interface. Currently we indexed resources from four Open Data portals, and allow search queries for geo-entities as well as full-text matches at http://data.wu.ac.at/odgraph/.}, language = {en}, urldate = {2020-12-01}, journal = {Procedia Computer Science}, author = {Neumaier, Sebastian and Savenkov, Vadim and Polleres, Axel}, month = jan, year = {2018}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Databases, Computer Science - Machine Learning, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science, Vortrag, Wiss. Beitrag, open data, peer-reviewed, spatio-temporal knowledge graph, spatio-temporal labelling}, pages = {9--20}, } @incollection{neumaier_data_2017, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Data {Integration} for {Open} {Data} on the {Web}}, isbn = {978-3-319-61033-7}, url = {https://doi.org/10.1007/978-3-319-61033-7_1}, abstract = {In this lecture we will discuss and introduce challenges of integrating openly available Web data and how to solve them. Firstly, while we will address this topic from the viewpoint of Semantic Web research, not all data is readily available as RDF or Linked Data, so we will give an introduction to different data formats prevalent on the Web, namely, standard formats for publishing and exchanging tabular, tree-shaped, and graph data. Secondly, not all Open Data is really completely open, so we will discuss and address issues around licences, terms of usage associated with Open Data, as well as documentation of data provenance. Thirdly, we will discuss issues connected with (meta-)data quality issues associated with Open Data on the Web and how Semantic Web techniques and vocabularies can be used to describe and remedy them. Fourth, we will address issues about searchability and integration of Open Data and discuss in how far semantic search can help to overcome these. We close with briefly summarizing further issues not covered explicitly herein, such as multi-linguality, temporal aspects (archiving, evolution, temporal querying), as well as how/whether OWL and RDFS reasoning on top of integrated open data could be help.}, language = {en}, urldate = {2020-12-01}, booktitle = {Reasoning {Web}. {Semantic} {Interoperability} on the {Web}: 13th {International} {Summer} {School} 2017, {London}, {UK}, {July} 7-11, 2017, {Tutorial} {Lectures}}, publisher = {Springer International Publishing}, author = {Neumaier, Sebastian and Polleres, Axel and Steyskal, Simon and Umbrich, Jürgen}, editor = {Ianni, Giovambattista and Lembo, Domenico and Bertossi, Leopoldo and Faber, Wolfgang and Glimm, Birte and Gottlob, Georg and Staab, Steffen}, year = {2017}, doi = {10.1007/978-3-319-61033-7_1}, keywords = {Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Link Data, Link Open Data, Metadata Description, Open Data, Tabular Data, Vortrag, Wiss. Beitrag}, pages = {1--28}, } @article{kubler_comparison_2018, series = {Internet {Plus} {Government}: {Advancement} of {Networking} {Technology} and {Evolution} of the {Public} {Sector}}, title = {Comparison of metadata quality in open data portals using the {Analytic} {Hierarchy} {Process}}, volume = {35}, issn = {0740-624X}, url = {http://www.sciencedirect.com/science/article/pii/S0740624X16301319}, doi = {10/gdbpvg}, abstract = {The quality of metadata in open data portals plays a crucial role for the success of open data. E-government, for example, have to manage accurate and complete metadata information to guarantee the reliability and foster the reputation of e-government to the public. Measuring and comparing the quality of open data is not a straightforward process because it implies to take into consideration multiple quality dimensions whose quality may vary from one another, as well as various open data stakeholders who – depending on their role/needs – may have different preferences regarding the dimensions’ importance. To address this Multi-Criteria Decision Making (MCDM) problem, and since data quality is hardly considered in existing e-government models, this paper develops an Open Data Portal Quality (ODPQ) framework that enables end-users to easily and in real-time assess/rank open data portals. From a theoretical standpoint, the Analytic Hierarchy Process (AHP) is used to integrate various data quality dimensions and end-user preferences. From a practical standpoint, the proposed framework is used to compare over 250 open data portals, powered by organizations across 43 different countries. The findings of our study reveals that today’s organizations do not pay sufficient heed to the management of datasets, resources and associated metadata that they are currently publishing on their portal.}, language = {en}, number = {1}, urldate = {2020-12-01}, journal = {Government Information Quarterly}, author = {Kubler, Sylvain and Robert, Jérérmy and Neumaier, Sebastian and Umbrich, Jürgen and Le Traon, Yves}, month = jan, year = {2018}, keywords = {Analytic Hierarchy Process, Data quality, Decision support system, E-government, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Multi-criteria decision making, Open data, Wiss. Beitrag, best, best-neumaier, peer-reviewed}, pages = {13--29}, } @article{neumaier_enabling_2019, title = {Enabling {Spatio}-{Temporal} {Search} in {Open} {Data}}, volume = {55}, url = {https://www.sciencedirect.com/science/article/pii/S1570826818300696}, doi = {10/ggwb56}, abstract = {Intuitively, most datasets found on governmental Open Data portals are organized by spatio-temporal criteria, that is, single datasets provide data for a certain region, valid for a certain time period. Likewise, for many use cases (such as, for instance, data journalism and fact checking) a pre-dominant need is to scope down the relevant datasets to a particular period or region. Rich spatio-temporal annotations are therefore a crucial need to enable semantic search for (and across) Open Data portals along those dimensions, yet – to the best of our knowledge – no working solution exists. To this end, we (i) present a scalable approach to construct a spatio-temporal knowledge graph that hierarchically structures geographical as well as temporal entities, (ii) annotate a large corpus of tabular datasets from open data portals with entities from this knowledge graph, and (iii) enable structured, spatio-temporal search and …}, number = {Elsevier}, journal = {Journal of Web Semantics}, author = {Neumaier, Sebastian and Polleres, Axel}, month = jan, year = {2019}, keywords = {FH SP Data Analytics \& Visual Computing, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science, Wiss. Beitrag, best, best-neumaier, peer-reviewed}, pages = {21--36}, } @article{neumaier_lifting_2017, title = {Lifting {Data} {Portals} to the {Web} of {Data}}, url = {http://ceur-ws.org/Vol-1809/article-03.pdf}, abstract = {Data portals are central hubs for freely available (governmental) datasets. ese portals use di erent so ware frameworks to publish their data, and the metadata descriptions of these datasets come in di erent schemas accordingly to the framework. e present work aims at re-exposing and connecting the metadata descriptions of currently 854k datasets on 261 data portals to the Web of Linked Data by mapping and publishing their homogenized metadata in standard vocabularies such as DCAT and Schema.org. Additionally, we publish existing quality information about the datasets and further enrich their descriptions by automatically generated metadata for CSV resources. In order to make all this information traceable and trustworthy, we annotate the generated data using the W3C’s provenance vocabulary. e dataset descriptions are harvested weekly and we o er access to the archived data by providing APIs compliant to the Memento framework. All this data – a total of about 120 million triples per weekly snapshot – is queryable at the SPARQL endpoint at data.wu.ac.at/portalwatch/sparql.}, language = {en}, number = {WWW ’17 Workshop on Linked Data on the Web (LDOW2017)}, author = {Neumaier, Sebastian and Umbrich, Jurgen and Polleres, Axel}, month = apr, year = {2017}, keywords = {FH SP Data Analytics \& Visual Computing, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science, Wiss. Beitrag, peer-reviewed}, pages = {10}, } @book{hogan_knowledge_2022, series = {Synthesis lectures on data, semantics and knowledge}, title = {Knowledge graphs}, isbn = {978-1-63639-235-6}, language = {eng}, number = {22}, publisher = {Morgan \& Claypool Publishers}, author = {Hogan, Aidan and Cochez, Michael and Melo, Gerard de and Neumaier, Sebastian}, year = {2022}, keywords = {Center for Artificial Intelligence, Depart Informatik und Security, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Wiss. Beitrag, best, best-neumaier}, } @article{hogan_knowledge_2021, series = {71}, title = {Knowledge {Graphs}}, volume = {54}, doi = {10.1145/3447772}, abstract = {In this paper we provide a comprehensive introduction to knowledge graphs, which have recently garnered significant attention from both industry and academia in scenarios that require exploiting diverse, dynamic, large-scale collections of data. After a general introduction, we motivate and contrast various graph-based data models and query languages that are used for knowledge graphs. We discuss the roles of schema, identity, and context in knowledge graphs. We explain how knowledge can be represented and extracted using a combination of deductive and inductive techniques. We summarise methods for the creation, enrichment, quality assessment, refinement, and publication of knowledge graphs. We provide an overview of prominent open knowledge graphs and enterprise knowledge graphs, their applications, and how they use the aforementioned techniques. We conclude with high-level future research directions for knowledge graphs.}, number = {4}, urldate = {2020-12-01}, journal = {ACM Computing Surveys}, author = {Hogan, Aidan and Blomqvist, Eva and Cochez, Michael and d'Amato, Claudia and de Melo, Gerard and Gutierrez, Claudio and Gayo, José Emilio Labra and Kirrane, Sabrina and Neumaier, Sebastian and Polleres, Axel and Navigli, Roberto and Ngomo, Axel-Cyrille Ngonga and Rashid, Sabbir M. and Rula, Anisa and Schmelzeisen, Lukas and Sequeda, Juan and Staab, Steffen and Zimmermann, Antoine}, month = jul, year = {2021}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Databases, Computer Science - Machine Learning, Extern, FH SP Data Analytics \& Visual Computing, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science, Wiss. Beitrag, best, best-neumaier, peer-reviewed}, pages = {1--37}, } @article{neumaier_automated_2016, title = {Automated {Quality} {Assessment} of {Metadata} across {Open} {Data} {Portals}}, volume = {Journal of Data and Information QualityVolume 8Issue 1}, url = {https://dl.acm.org/doi/abs/10.1145/2964909}, doi = {https://doi.org/10.1145/2964909}, abstract = {The Open Data movement has become a driver for publicly available data on the Web. More and more data—from governments and public institutions but also from the private sector—are made available online and are mainly published in so-called Open Data portals. However, with the increasing number of published resources, there is a number of concerns with regards to the quality of the data sources and the corresponding metadata, which compromise the searchability, discoverability, and usability of resources. In order to get a more complete picture of the severity of these issues, the present work aims at developing a generic metadata quality assessment framework for various Open Data portals: We treat data portals independently from the portal software frameworks by mapping the specific metadata of three widely used portal software frameworks (CKAN, Socrata, OpenDataSoft) to the standardized Data Catalog Vocabulary metadata schema. We subsequently define several quality metrics, which can be evaluated automatically and in an efficient manner. Finally, we report findings based on monitoring a set of over 260 Open Data portals with 1.1M datasets. This includes the discussion of general quality issues, for example, the retrievability of data, and the analysis of our specific quality metrics.}, number = {Journal of Data and Information QualityVolume 8Issue 1}, journal = {ACM Journal of Data and Information Quality (JDIQ)}, author = {Neumaier, Sebastian and Umbrich, Jürgen and Polleres, Axel}, month = oct, year = {2016}, keywords = {FH SP Data Analytics \& Visual Computing, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science, Wiss. Beitrag, best, best-neumaier, peer-reviewed}, pages = {pp 1--29}, } @inproceedings{portisch_challenges_2020, address = {Cham}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Challenges of {Linking} {Organizational} {Information} in {Open} {Government} {Data} to {Knowledge} {Graphs}}, isbn = {978-3-030-61244-3}, doi = {10/gh38b7}, abstract = {Open Government Data (OGD) is being published by various public administration organizations around the globe. Within the metadata of OGD data catalogs, the publishing organizations (1) are not uniquely and unambiguously identifiable and, even worse, (2) change over time, by public administration units being merged or restructured. In order to enable fine-grained analyzes or searches on Open Government Data on the level of publishing organizations, linking those from OGD portals to publicly available knowledge graphs (KGs) such as Wikidata and DBpedia seems like an obvious solution. Still, as we show in this position paper, organization linking faces significant challenges, both in terms of available (portal) metadata and KGs in terms of data quality and completeness. We herein specifically highlight five main challenges, namely regarding (1) temporal changes in organizations and in the portal metadata, (2) lack of a base ontology for describing organizational structures and changes in public knowledge graphs, (3) metadata and KG data quality, (4) multilinguality, and (5) disambiguating public sector organizations. Based on available OGD portal metadata from the Open Data Portal Watch, we provide an in-depth analysis of these issues, make suggestions for concrete starting points on how to tackle them along with a call to the community to jointly work on these open challenges.}, language = {en}, booktitle = {Knowledge {Engineering} and {Knowledge} {Management}}, publisher = {Springer International Publishing}, author = {Portisch, Jan and Fallatah, Omaima and Neumaier, Sebastian and Jaradeh, Mohamad Yaser and Polleres, Axel}, editor = {Keet, C. Maria and Dumontier, Michel}, year = {2020}, keywords = {Dataset evolution, Depart Informatik und Security, Entity linking, Extern, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Knowledge graph evolution, Knowledge graphs, Open data, Wiss. Beitrag, peer-reviewed}, pages = {271--286}, } @article{told_willensbildung_2020, title = {Willensbildung der {Kapitalgesellschafter} in absentia}, volume = {34}, issn = {0930-3855}, url = {https://elibrary.verlagoesterreich.at/article/10.33196/wbl202007036101}, doi = {10/gh38b6}, abstract = {{\textless}p{\textgreater}Die Abhaltung von Gesellschafterversammlungen in Kapitalgesellschaften und Genossenschaften ist infolge der COVID-19 Pandemie erschwert. Aus diesem Anlass arbeitet der Beitrag die generellen Möglichkeiten der Willensbildung der Gesellschafter ohne gleichzeitige persönliche Anwesenheit auf. Insbesondere untersucht er die durch die gesellschaftsrechtliche COVID-19 Gesetzgebung zeitlich befristet erweiterten Möglichkeiten der Abhaltung virtueller Versammlungen. Er schließt mit der Frage, inwieweit diese erweiterten Möglichkeiten de lege ferenda beibehalten werden sollten.{\textless}/p{\textgreater}}, language = {de}, number = {7}, urldate = {2020-12-01}, journal = {Wirtschaftsrechtliche Blätter}, author = {Told, Julia and Neumaier, Sebastian}, year = {2020}, note = {Publisher: Verlag Österreich}, keywords = {Depart Informatik und Security, Extern, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Wiss. Beitrag, best, best-neumaier, peer-reviewed}, pages = {361--375}, } @inproceedings{neumaier_search_2018, address = {Republic and Canton of Geneva, CHE}, series = {{WWW} '18}, title = {Search, {Filter}, {Fork}, and {Link} {Open} {Data}: {The} {ADEQUATe} platform: data- and community-driven quality improvements}, isbn = {978-1-4503-5640-4}, shorttitle = {Search, {Filter}, {Fork}, and {Link} {Open} {Data}}, url = {https://doi.org/10.1145/3184558.3191602}, doi = {10/gjbbgk}, abstract = {The present work describes the ADEQUATe platform: a framework to monitor the quality of (Governmental) Open Data catalogs, to re-publish improved and linked versions of the datasets and their respective metadata descriptions, and to include the community in the quality improvement process. The information acquired by the linking and (meta)data improvement steps is then integrated in a semantic search engine. In the paper, we first describe the requirements of the platform, which are based on focus group interviews and a web-based survey. Second, we use these requirements to formulate the goals and show the architecture of the overall platform, and third, we showcase the potential and relevance of the platform to resolve the requirements by describing exemplary user journeys exploring the system. The platform is available at: https://www.adequate.at/}, urldate = {2020-12-01}, booktitle = {Companion {Proceedings} of the {The} {Web} {Conference} 2018}, publisher = {International World Wide Web Conferences Steering Committee}, author = {Neumaier, Sebastian and Thurnay, Lörinc and Lampoltshammer, Thomas J. and Knap, Tomá}, month = apr, year = {2018}, keywords = {Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Wiss. Beitrag, community involvement, data quality, linkage, open data, peer-reviewed, semantic search}, pages = {1523--1526}, } @inproceedings{thurnay_adequate_2019, address = {Cham}, series = {Lecture {Notes} in {Business} {Information} {Processing}}, title = {{ADEQUATe}: {A} {Community}-{Driven} {Approach} to {Improve} {Open} {Data} {Quality}}, isbn = {978-3-030-04849-5}, shorttitle = {{ADEQUATe}}, doi = {10/gnt2vx}, abstract = {This paper introduces the ADEQUATe project—a platform to improve the quality of open data in a community-driven fashion. First, the context of the project is discussed: the issue of quality of open data, its relevance in Austria and how ADEQUATe attempts to tackle these matters. Then the main components of the project are introduced, outlining how they support the goals of the project: Portal Watch managing monitoring, quality assessment and enhancement of data, the ADEQUATe Knowledge Base providing the backbone to the search and semantic enrichment components, the faceted Search functionality, Dataset profiles presenting an enriched overview of individual datasets to users, ADEQUATe’s GitLab instance providing the community dimension to the portal, and Odalic, a tool for semantic interpretation of tabular data. The paper is concluded with an outlook to the benefits of the project: easier data discovery, increased insight to data evolution, community engagement leading to contribution by a wider part of the population, increased transparency and democratization as well as positive feedback loops with data maintainers, public administration and the private sector.}, language = {en}, booktitle = {Business {Information} {Systems} {Workshops}}, publisher = {Springer International Publishing}, author = {Thurnay, Lőrinc and Lampoltshammer, Thomas J. and Neumaier, Sebastian and Knap, Tomáš}, editor = {Abramowicz, Witold and Paschke, Adrian}, year = {2019}, keywords = {Community engagement, Depart Informatik und Security, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Linked data, Open Governmental Data, Open data portal, SP IT Sec Applied Security \& Data Science, Semantic web, Wiss. Beitrag, peer-reviewed}, pages = {555--565}, } @article{priebe_von_2022, title = {Von {Data} {Warehouse} bis {Data} {Mesh}: {Ein} {Wegweiser} durch den {Dschungel} analytischer {Datenarchitekturen}}, url = {https://www.sigs-datacom.de/digital/bi-spektrum/}, number = {4/2022}, journal = {BI-SPEKTRUM}, author = {Priebe, Torsten and Neumaier, Sebastian and Markus, Stefan}, month = oct, year = {2022}, keywords = {Center for Artificial Intelligence, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, peer-reviewed, ⛔ No DOI found}, } @book{dimou_towards_2022, address = {Washington}, series = {Studies on the semantic web / ssw}, title = {Towards a knowledge-aware ai}, copyright = {CC BY}, isbn = {978-1-64368-320-1}, url = {https://ebooks.iospress.nl/volume/towards-a-knowledge-aware-ai-semantics-2022-proceedings-of-the-18th-international-conference-on-semantic-systems-1315-september-2022-vienna-austria}, abstract = {"Semantic systems lie at the heart of modern computing, interlinking with areas as diverse as AI, data science, knowledge discovery and management, big data analytics, e-commerce, enterprise search, technical documentation, document management, business intelligence, enterprise vocabulary management, machine learning, logic programming, content engineering, social computing, and the Semantic Web. This book presents the proceedings of SEMANTiCS 2022, the 18th International Conference on Semantic Systems, held as a hybrid event - live in Vienna, Austria and online - from 12 - 15 September 2022. The SEMANTiCS conference is an annual meeting place for the professionals and researchers who make semantic computing work, who understand its benefits and encounter its limitations, and is attended by information managers, IT architects, software engineers, and researchers from organizations ranging from research facilities and NPOs, through public administrations to the largest companies in the world. The theme and subtitle of the 2022 conference was Towards A Knowledge-Aware AI, and the book contains 15 papers, selected on the basis of quality, impact and scientific merit following a rigorous review process which resulted in an acceptance rate of 29\%. The book is divided into four chapters: semantics in data quality, standards and protection; representation learning and reasoning for downstream AI tasks; ontology development; and learning over complementary knowledge. Providing an overview of emerging trends and topics in the wide area of semantic computing, the book will be of interest to anyone involved in the development and deployment of computer technology and AI systems"--}, number = {55}, publisher = {IOS Press}, editor = {Dimou, Anastasia and Neumaier, Sebastian and Pellegrini, Tassilo and Vahdati, Sahar}, year = {2022}, keywords = {!!Nicht verwenden!! - Department Digital Business \& Innovation, Department Digital Business and Innovation, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Institute for Innovation Systems, Open Access, best, peer-reviewed, wiss. Beitrag}, } @inproceedings{neumaier_towards_2021, address = {Barcelona}, title = {Towards an {Architecture} for {Policy}-{Aware} {Decentral} {Dataset} {Exchange}}, isbn = {978-1-61208-888-4}, url = {https://www.thinkmind.org/index.php?view=article&articleid=semapro_2021_1_40_30020}, booktitle = {{SEMAPRO} 2021, {The} {Fifteenth} {International} {Conference} on {Advances} in {Semantic} {Processing}}, publisher = {IARIA}, author = {Neumaier, Sebastian and Havur, Giray and Pellegrini, Tassilo}, year = {2021}, keywords = {!!Nicht verwenden!! - Department Digital Business \& Innovation, Department Digital Business and Innovation, FH SP Data Analytics \& Visual Computing, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Institute for Innovation Systems, SP IT Sec Applied Security \& Data Science, Vortrag, best, peer-reviewed}, } @inproceedings{havur_dalicc_2021, address = {Amsterdam and Online, September}, series = {{CEUR} {Workshop} {Proceedings}}, title = {{DALICC} {As} {A} {Service} - {A} {Scaleable} {Architecture} for {License} {Clearance}}, volume = {2941}, url = {http://ceur-ws.org/Vol-2941/#paper12}, language = {en}, urldate = {2021-09-14}, booktitle = {Joint {Proceedings} of the {Semantics} co-located events: {Poster}\&{Demo} track and {Workshop} on {Ontology}-{Driven} {Conceptual} {Modelling} of {Digital} {Twins}}, publisher = {CEUR}, author = {Havur, Giray and Neumaier, Sebastian and Pellegrini, Tassilo}, editor = {Tiddi, Ilaria and Maleshkova, Maria and Pellegrini, Tassilo and Boer, Victor de}, month = sep, year = {2021}, note = {ISSN: 1613-0073}, keywords = {!!Nicht verwenden!! - Department Digital Business \& Innovation, DALICC, Department Digital Business and Innovation, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, Institute for Innovation Systems, Open Access, Vortrag, Wiss. Beitrag, legaltech, peer-reviewed}, } @inproceedings{priebe_finding_2021, address = {Orlando, FL, USA}, title = {Finding {Your} {Way} {Through} the {Jungle} of {Big} {Data} {Architectures}}, url = {https://ieeexplore.ieee.org/document/9671862}, doi = {10/gn7mtm}, abstract = {This paper presents a systematic review of common analytical data architectures based on DAMA-DMBOK and ArchiMate. The paper is work in progress and provides a first view on Gartner’s Logical Data Warehouse paradigm, Data Fabric and Dehghani’s Data Mesh proposal as well as their interdependencies. It furthermore sketches the way forward how this work can be extended by covering more architecture paradigms (incl. classic Data Warehouse, Data Vault, Data Lake, Lambda and Kappa architectures) and introducing a template with among others "context", "problem" and "solution" descriptions, leading ultimately to a pattern system providing guidance for choosing the right architecture paradigm for the right situation.}, publisher = {IEEE}, author = {Priebe, Torsten and Neumaier, Sebastian and Markus, Stefan}, year = {2021}, keywords = {FH SP Data Analytics \& Visual Computing, Forschungsgruppe Data Intelligence, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science, Vortrag, Wiss. Beitrag, best, peer-reviewed}, } @phdthesis{neumaier_open_2015, address = {Vienna}, type = {Thesis}, title = {Open data quality; assessment and evolution of (meta-)data quality in the open data landscape}, url = {https://repositum.tuwien.at/handle/20.500.12708/4096}, abstract = {Die Open-Data-Bewegung erfreut sich wachsender Beliebtheit unter Regierungen und öffentlichen Institutionen, aber auch in der Privatwirtschaft und unter Privatpersonen, und gewinnt so immer mehr Unterstützerinnen und Unterstützer aus all diesen Sektoren. Gleichzeitig melden sich aber auch vermehrt kritische Stimmen zu Wort. Hauptsorge ist die niedrige Metadaten-Qualität in Open Data Portalen, die eine Beeinträchtigung der Suche und der Auffindbarkeit von Ressourcen mit sich bringt. Bis jetzt konnte diese Sorge jedoch nicht belegt werden, da es noch keinen umfassenden und objektiven Bericht über die wirkliche Qualität von Open Data Portalen gibt. Um so einen Bericht erstellen zu können, wird ein Framework benötigt, welches die Portale über einen längeren Zeitraum hinweg beobachtet und so die Entwicklung und das Wachstum von Open Data abschätzen kann. Die vorliegende Diplomarbeit hat das Ziel diese Qualitätsprobleme in Open Data Portalen zu untersuchen. Dazu wird ein Monitoring Framework vorgestellt, welches in regelmäßigen Abständen die Metadaten von 126 CKAN Portalen speichert und deren Qualität bewertet. Die Arbeit stellt die dazu notwendigen Qualitätsmetriken vor, diskutiert den Aufbau des Monitoring Frameworks und präsentiert Erkenntnisse und Resultate, die aus dem Monitoring der Portale gewonnen werden konnten. Dazu werden Auswertungen der eingeführten Qualitätsmetriken prästeniert, die auf Qualitätsprobleme in den untersuchten Datenportalen hinweisen. Konkret konnte unter anderem ein schnelles Wachstum von diversen Open Data Portalen und eine hohe Heterogenität bezüglich der Datenformate und Lizenzen beobachtet werden. Darüberhinaus wird in dieser Arbeit ein Ansatz zur Homogenisierung von Metadaten von unterschiedlichen Datenportalen vorgestellt: Dazu wird ein Mapping vorgestellt, welches die Metadaten von CKAN, Socrata und OpenDataoft Portalen auf ein gemeinsames Schema bringt und damit die Portale vergleichbar und integrierbar macht.}, language = {en}, urldate = {2020-12-01}, school = {Vienna University of Technology}, author = {Neumaier, Sebastian}, year = {2015}, note = {Accepted: 2020-06-28T15:15:55Z}, keywords = {FH SP Data Analytics \& Visual Computing, Institut für IT Sicherheitsforschung, SP IT Sec Applied Security \& Data Science}, }