biblio.bib

@online{_,
  title = {Measuring the Linearity of Machine Learning Models — {{Alibi}} 0.9.5dev Documentation},
  url = {https://docs.seldon.io/projects/alibi/en/latest/methods/LinearityMeasure.html},
  urldate = {2023-10-22}
}

@article{bareinboim_,
  title = {Seminar Für {{Statistik}}, {{ETH Zürich}} Drago.Plecko@stat.Math.Ethz.Ch},
  author = {Bareinboim, Elias},
  abstract = {Decision-making systems based on AI and machine learning have been used throughout a wide range of real-world scenarios, including healthcare, law enforcement, education, and finance. It is no longer far-fetched to envision a future where autonomous systems will drive entire business decisions and, more broadly, support large-scale decision-making infrastructure to solve society’s most challenging problems. Issues of unfairness and discrimination are pervasive when decisions are being made by humans, and remain (or are potentially amplified) when decisions are made using machines with little transparency, accountability, and fairness. In this paper, we introduce a framework for causal fairness analysis with the intent of filling in this gap, i.e., understanding, modeling, and possibly solving issues of fairness in decision-making settings. The main insight of our approach will be to link the quantification of the disparities present in the observed data with the underlying, often unobserved, collection of causal mechanisms that generate the disparity in the first place, a challenge we call the Fundamental Problem of Causal Fairness Analysis (FPCFA). In order to solve the FPCFA, we study the problem of decomposing variations and empirical measures of fairness that attribute such variations to structural mechanisms and different units of the population. Our effort culminates in the Fairness Map, the first systematic attempt to organize and explain the relationship between various criteria found in the literature. Finally, we study which causal assumptions are minimally needed for performing causal fairness analysis and propose the Fairness Cookbook, which allows one to assess the existence of disparate impact and disparate treatment.},
  langid = {english},
  file = {C:\Users\MarcoRepetto\Zotero\storage\WFLQJU2C\bareinboim_.pdf}
}

@article{barredoarrieta_2020,
  title = {Explainable {{Artificial Intelligence}} ({{XAI}}): {{Concepts}}, Taxonomies, Opportunities and Challenges toward Responsible {{AI}}},
  shorttitle = {Explainable {{Artificial Intelligence}} ({{XAI}})},
  author = {Barredo Arrieta, Alejandro and Díaz-Rodríguez, Natalia and Del Ser, Javier and Bennetot, Adrien and Tabik, Siham and Barbado, Alberto and Garcia, Salvador and Gil-Lopez, Sergio and Molina, Daniel and Benjamins, Richard and Chatila, Raja and Herrera, Francisco},
  date = {2020-06-01},
  journaltitle = {Information Fusion},
  shortjournal = {Information Fusion},
  volume = {58},
  pages = {82--115},
  issn = {1566-2535},
  doi = {10.1016/j.inffus.2019.12.012},
  url = {https://www.sciencedirect.com/science/article/pii/S1566253519308103},
  urldate = {2022-01-04},
  abstract = {In the last few years, Artificial Intelligence (AI) has achieved a notable momentum that, if harnessed appropriately, may deliver the best of expectations over many application sectors across the field. For this to occur shortly in Machine Learning, the entire community stands in front of the barrier of explainability, an inherent problem of the latest techniques brought by sub-symbolism (e.g. ensembles or Deep Neural Networks) that were not present in the last hype of AI (namely, expert systems and rule based models). Paradigms underlying this problem fall within the so-called eXplainable AI (XAI) field, which is widely acknowledged as a crucial feature for the practical deployment of AI models. The overview presented in this article examines the existing literature and contributions already done in the field of XAI, including a prospect toward what is yet to be reached. For this purpose we summarize previous efforts made to define explainability in Machine Learning, establishing a novel definition of explainable Machine Learning that covers such prior conceptual propositions with a major focus on the audience for which the explainability is sought. Departing from this definition, we propose and discuss about a taxonomy of recent contributions related to the explainability of different Machine Learning models, including those aimed at explaining Deep Learning methods for which a second dedicated taxonomy is built and examined in detail. This critical literature analysis serves as the motivating background for a series of challenges faced by XAI, such as the interesting crossroads of data fusion and explainability. Our prospects lead toward the concept of Responsible Artificial Intelligence, namely, a methodology for the large-scale implementation of AI methods in real organizations with fairness, model explainability and accountability at its core. Our ultimate goal is to provide newcomers to the field of XAI with a thorough taxonomy that can serve as reference material in order to stimulate future research advances, but also to encourage experts and professionals from other disciplines to embrace the benefits of AI in their activity sectors, without any prior bias for its lack of interpretability.},
  langid = {english},
  keywords = {\_tablet,Accountability,Comprehensibility,Data Fusion,Deep Learning,Explainable Artificial Intelligence,Fairness,Interpretability,Machine Learning,Privacy,Responsible Artificial Intelligence,Transparency},
  file = {C:\Users\MarcoRepetto\Zotero\storage\666EB9C3\barredoarrieta_2020.pdf}
}

@online{bellamy_2018,
  title = {{{AI Fairness}} 360: {{An Extensible Toolkit}} for {{Detecting}}, {{Understanding}}, and {{Mitigating Unwanted Algorithmic Bias}}},
  shorttitle = {{{AI Fairness}} 360},
  author = {Bellamy, Rachel K. E. and Dey, Kuntal and Hind, Michael and Hoffman, Samuel C. and Houde, Stephanie and Kannan, Kalapriya and Lohia, Pranay and Martino, Jacquelyn and Mehta, Sameep and Mojsilovic, Aleksandra and Nagar, Seema and Ramamurthy, Karthikeyan Natesan and Richards, John and Saha, Diptikalyan and Sattigeri, Prasanna and Singh, Moninder and Varshney, Kush R. and Zhang, Yunfeng},
  date = {2018-10-03},
  eprint = {1810.01943},
  eprinttype = {arxiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/1810.01943},
  urldate = {2023-09-12},
  abstract = {Fairness is an increasingly important concern as machine learning models are used to support decision making in high-stakes applications such as mortgage lending, hiring, and prison sentencing. This paper introduces a new open source Python toolkit for algorithmic fairness, AI Fairness 360 (AIF360), released under an Apache v2.0 license \{https://github.com/ibm/aif360). The main objectives of this toolkit are to help facilitate the transition of fairness research algorithms to use in an industrial setting and to provide a common framework for fairness researchers to share and evaluate algorithms. The package includes a comprehensive set of fairness metrics for datasets and models, explanations for these metrics, and algorithms to mitigate bias in datasets and models. It also includes an interactive Web experience (https://aif360.mybluemix.net) that provides a gentle introduction to the concepts and capabilities for line-of-business users, as well as extensive documentation, usage guidance, and industry-specific tutorials to enable data scientists and practitioners to incorporate the most appropriate tool for their problem into their work products. The architecture of the package has been engineered to conform to a standard paradigm used in data science, thereby further improving usability for practitioners. Such architectural design and abstractions enable researchers and developers to extend the toolkit with their new algorithms and improvements, and to use it for performance benchmarking. A built-in testing infrastructure maintains code quality.\vphantom\}},
  pubstate = {preprint},
  keywords = {Computer Science - Artificial Intelligence},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\5E2WNVTH\\bellamy_2018.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\FI4BP2V3\\1810.html}
}

@article{benjamins_2021,
  title = {A Choices Framework for the Responsible Use of {{AI}}},
  author = {Benjamins, Richard},
  date = {2021-02-01},
  journaltitle = {AI and Ethics},
  shortjournal = {AI Ethics},
  volume = {1},
  number = {1},
  pages = {49--53},
  issn = {2730-5961},
  doi = {10.1007/s43681-020-00012-5},
  url = {https://doi.org/10.1007/s43681-020-00012-5},
  urldate = {2023-10-29},
  abstract = {Popular press and media often make us believe that artificial intelligence technology is ethical or unethical by itself. In this paper, we will argue that organizations that develop or apply AI have certain choices they can make that will lead to a more or less responsible use of AI. By approaching those choices in a methodological way, organizations can make better decisions toward the ethical use of this powerful technology.},
  langid = {english},
  keywords = {assessment\_framework},
  file = {C:\Users\MarcoRepetto\Zotero\storage\5X7PX9PX\benjamins_2021.pdf}
}

@inproceedings{biswas_2021,
  title = {Fair {{Preprocessing}}: {{Towards Understanding Compositional Fairness}} of {{Data Transformers}} in {{Machine Learning Pipeline}}},
  shorttitle = {Fair {{Preprocessing}}},
  booktitle = {Proceedings of the 29th {{ACM Joint Meeting}} on {{European Software Engineering Conference}} and {{Symposium}} on the {{Foundations}} of {{Software Engineering}}},
  author = {Biswas, Sumon and Rajan, Hridesh},
  date = {2021-08-20},
  eprint = {2106.06054},
  eprinttype = {arxiv},
  eprintclass = {cs},
  pages = {981--993},
  doi = {10.1145/3468264.3468536},
  url = {http://arxiv.org/abs/2106.06054},
  urldate = {2023-10-05},
  abstract = {In recent years, many incidents have been reported where machine learning models exhibited discrimination among people based on race, sex, age, etc. Research has been conducted to measure and mitigate unfairness in machine learning models. For a machine learning task, it is a common practice to build a pipeline that includes an ordered set of data preprocessing stages followed by a classifier. However, most of the research on fairness has considered a single classifier based prediction task. What are the fairness impacts of the preprocessing stages in machine learning pipeline? Furthermore, studies showed that often the root cause of unfairness is ingrained in the data itself, rather than the model. But no research has been conducted to measure the unfairness caused by a specific transformation made in the data preprocessing stage. In this paper, we introduced the causal method of fairness to reason about the fairness impact of data preprocessing stages in ML pipeline. We leveraged existing metrics to define the fairness measures of the stages. Then we conducted a detailed fairness evaluation of the preprocessing stages in 37 pipelines collected from three different sources. Our results show that certain data transformers are causing the model to exhibit unfairness. We identified a number of fairness patterns in several categories of data transformers. Finally, we showed how the local fairness of a preprocessing stage composes in the global fairness of the pipeline. We used the fairness composition to choose appropriate downstream transformer that mitigates unfairness in the machine learning pipeline.},
  keywords = {Computer Science - Machine Learning,D.2.0,I.2.5},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\L26HNFFL\\biswas_2021.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\FNEEF8F4\\2106.html}
}

@article{diaz-rodriguez_2023,
  title = {Connecting the Dots in Trustworthy {{Artificial Intelligence}}: {{From AI}} Principles, Ethics, and Key Requirements to Responsible {{AI}} Systems and Regulation},
  shorttitle = {Connecting the Dots in Trustworthy {{Artificial Intelligence}}},
  author = {Díaz-Rodríguez, Natalia and Del Ser, Javier and Coeckelbergh, Mark and López de Prado, Marcos and Herrera-Viedma, Enrique and Herrera, Francisco},
  date = {2023-11-01},
  journaltitle = {Information Fusion},
  shortjournal = {Information Fusion},
  volume = {99},
  pages = {101896},
  issn = {1566-2535},
  doi = {10.1016/j.inffus.2023.101896},
  url = {https://www.sciencedirect.com/science/article/pii/S1566253523002129},
  urldate = {2023-10-04},
  abstract = {Trustworthy Artificial Intelligence (AI) is based on seven technical requirements sustained over three main pillars that should be met throughout the system’s entire life cycle: it should be (1) lawful, (2) ethical, and (3) robust, both from a technical and a social perspective. However, attaining truly trustworthy AI concerns a wider vision that comprises the trustworthiness of all processes and actors that are part of the system’s life cycle, and considers previous aspects from different lenses. A more holistic vision contemplates four essential axes: the global principles for ethical use and development of AI-based systems, a philosophical take on AI ethics, a risk-based approach to AI regulation, and the mentioned pillars and requirements. The seven requirements (human agency and oversight; robustness and safety; privacy and data governance; transparency; diversity, non-discrimination and fairness; societal and environmental wellbeing; and accountability) are analyzed from a triple perspective: What each requirement for trustworthy AI is, Why it is needed, and How each requirement can be implemented in practice. On the other hand, a practical approach to implement trustworthy AI systems allows defining the concept of responsibility of AI-based systems facing the law, through a given auditing process. Therefore, a responsible AI system is the resulting notion we introduce in this work, and a concept of utmost necessity that can be realized through auditing processes, subject to the challenges posed by the use of regulatory sandboxes. Our multidisciplinary vision of trustworthy AI culminates in a debate on the diverging views published lately about the future of AI. Our reflections in this matter conclude that regulation is a key for reaching a consensus among these views, and that trustworthy and responsible AI systems will be crucial for the present and future of our society.},
  keywords = {AI ethics,AI regulation,Regulatory sandbox,Responsible AI systems,Trustworthy AI},
  file = {C:\Users\MarcoRepetto\Zotero\storage\TNASL858\diaz-rodriguez_2023.pdf}
}

@report{EC2020,
  title = {The Assessment List for Trustworthy Artificial Intelligence},
  author = {High-Level Expert Group on Artificial Intelligence, European Commission},
  date = {2020}
}

@book{europeancommission.jointresearchcentre._2023,
  title = {Cybersecurity of Artificial Intelligence in the {{AI Act}}: Guiding Principles to Address the Cybersecurity Requirement for High Risk {{AI}} Systems.},
  shorttitle = {Cybersecurity of Artificial Intelligence in the {{AI Act}}},
  author = {{European Commission. Joint Research Centre.}},
  date = {2023},
  publisher = {{Publications Office}},
  location = {{LU}},
  url = {https://data.europa.eu/doi/10.2760/271009},
  urldate = {2023-10-05},
  langid = {english},
  file = {C:\Users\MarcoRepetto\Zotero\storage\QT4JGUF3\europeancommission.jointresearchcentre._2023.pdf}
}

@book{europeancommission.jointresearchcentre._2023a,
  title = {Analysis of the Preliminary {{AI}} Standardisation Work Plan in Support of the {{AI Act}}.},
  author = {{European Commission. Joint Research Centre.}},
  date = {2023},
  publisher = {{Publications Office}},
  location = {{LU}},
  url = {https://data.europa.eu/doi/10.2760/5847},
  urldate = {2023-10-05},
  langid = {english},
  file = {C:\Users\MarcoRepetto\Zotero\storage\JG8NMYYZ\europeancommission.jointresearchcentre._2023a.pdf}
}

@article{falco_2021,
  title = {Governing {{AI}} Safety through Independent Audits},
  author = {Falco, Gregory and Shneiderman, Ben and Badger, Julia and Carrier, Ryan and Dahbura, Anton and Danks, David and Eling, Martin and Goodloe, Alwyn and Gupta, Jerry and Hart, Christopher and Jirotka, Marina and Johnson, Henric and LaPointe, Cara and Llorens, Ashley J. and Mackworth, Alan K. and Maple, Carsten and Pálsson, Sigurður Emil and Pasquale, Frank and Winfield, Alan and Yeong, Zee Kin},
  date = {2021-07},
  journaltitle = {Nature Machine Intelligence},
  shortjournal = {Nat Mach Intell},
  volume = {3},
  number = {7},
  pages = {566--571},
  publisher = {{Nature Publishing Group}},
  issn = {2522-5839},
  doi = {10.1038/s42256-021-00370-7},
  url = {https://www.nature.com/articles/s42256-021-00370-7},
  urldate = {2023-10-29},
  abstract = {Highly automated systems are becoming omnipresent. They range in function from self-driving vehicles to advanced medical diagnostics and afford many benefits. However, there are assurance challenges that have become increasingly visible in high-profile crashes and incidents. Governance of such systems is critical to garner widespread public trust. Governance principles have been previously proposed offering aspirational guidance to automated system developers; however, their implementation is often impractical given the excessive costs and processes required to enact and then enforce the principles. This Perspective, authored by an international and multidisciplinary team across government organizations, industry and academia, proposes a mechanism to drive widespread assurance of highly automated systems: independent audit. As proposed, independent audit of AI systems would embody three ‘AAA’ governance principles of prospective risk Assessments, operation Audit trails and system Adherence to jurisdictional requirements. Independent audit of AI systems serves as a pragmatic approach to an otherwise burdensome and unenforceable assurance challenge.},
  issue = {7},
  langid = {english},
  keywords = {assessment\_framework,Computer science,Ethics,Information systems and information technology,Science,Software,technology and society}
}

@online{ferry_2023,
  title = {Probabilistic {{Dataset Reconstruction}} from {{Interpretable Models}}},
  author = {Ferry, Julien and Aïvodji, Ulrich and Gambs, Sébastien and Huguet, Marie-José and Siala, Mohamed},
  date = {2023-08-29},
  eprint = {2308.15099},
  eprinttype = {arxiv},
  eprintclass = {cs, math},
  url = {http://arxiv.org/abs/2308.15099},
  urldate = {2023-08-30},
  abstract = {Interpretability is often pointed out as a key requirement for trustworthy machine learning. However, learning and releasing models that are inherently interpretable leaks information regarding the underlying training data. As such disclosure may directly conflict with privacy, a precise quantification of the privacy impact of such breach is a fundamental problem. For instance, previous work have shown that the structure of a decision tree can be leveraged to build a probabilistic reconstruction of its training dataset, with the uncertainty of the reconstruction being a relevant metric for the information leak. In this paper, we propose of a novel framework generalizing these probabilistic reconstructions in the sense that it can handle other forms of interpretable models and more generic types of knowledge. In addition, we demonstrate that under realistic assumptions regarding the interpretable models' structure, the uncertainty of the reconstruction can be computed efficiently. Finally, we illustrate the applicability of our approach on both decision trees and rule lists, by comparing the theoretical information leak associated to either exact or heuristic learning algorithms. Our results suggest that optimal interpretable models are often more compact and leak less information regarding their training data than greedily-built ones, for a given accuracy level.},
  pubstate = {preprint},
  version = {1},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Information Theory},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\RB5EE8AL\\ferry_2023.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\YZNRHHRP\\2308.html}
}

@online{floridi_2022,
  type = {SSRN Scholarly Paper},
  title = {{{capAI}} - {{A Procedure}} for {{Conducting Conformity Assessment}} of {{AI Systems}} in {{Line}} with the {{EU Artificial Intelligence Act}}},
  author = {Floridi, Luciano and Holweg, Matthias and Taddeo, Mariarosaria and Amaya Silva, Javier and Mökander, Jakob and Wen, Yuni},
  date = {2022-03-23},
  number = {4064091},
  location = {{Rochester, NY}},
  doi = {10.2139/ssrn.4064091},
  url = {https://papers.ssrn.com/abstract=4064091},
  urldate = {2023-10-29},
  abstract = {We have developed capAI, a conformity assessment procedure for AI systems, to provide an independent, comparable, quantifiable, and accountable assessment of AI systems that conforms with the proposed AIA regulation. By building on the AIA, capAI provides organisations with practical guidance on how high-level ethics principles can be translated into verifiable criteria that help shape the design, development, deployment and use of ethical AI. The main purpose of capAI is to serve as a governance tool that ensures and demonstrates that the development and operation of an AI system are trustworthy – i.e., legally compliant, ethically sound, and technically robust – and thus conform to the AIA.},
  langid = {english},
  pubstate = {preprint},
  keywords = {AI,AI Act,AI Auditing,AI legislation,assessment\_framework,Conformity Assessment,EU,framework,Trustworthy AI},
  file = {C:\Users\MarcoRepetto\Zotero\storage\985ZKSMH\floridi_2022.pdf}
}

@online{ghosh_2022,
  title = {Algorithmic {{Fairness Verification}} with {{Graphical Models}}},
  author = {Ghosh, Bishwamittra and Basu, Debabrota and Meel, Kuldeep S.},
  date = {2022-06-01},
  eprint = {2109.09447},
  eprinttype = {arxiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/2109.09447},
  urldate = {2023-10-20},
  abstract = {In recent years, machine learning (ML) algorithms have been deployed in safety-critical and high-stake decision-making, where the fairness of algorithms is of paramount importance. Fairness in ML centers on detecting bias towards certain demographic populations induced by an ML classifier and proposes algorithmic solutions to mitigate the bias with respect to different fairness definitions. To this end, several fairness verifiers have been proposed that compute the bias in the prediction of an ML classifier--essentially beyond a finite dataset--given the probability distribution of input features. In the context of verifying linear classifiers, existing fairness verifiers are limited by accuracy due to imprecise modeling of correlations among features and scalability due to restrictive formulations of the classifiers as SSAT/SMT formulas or by sampling. In this paper, we propose an efficient fairness verifier, called FVGM, that encodes the correlations among features as a Bayesian network. In contrast to existing verifiers, FVGM proposes a stochastic subset-sum based approach for verifying linear classifiers. Experimentally, we show that FVGM leads to an accurate and scalable assessment for more diverse families of fairness-enhancing algorithms, fairness attacks, and group/causal fairness metrics than the state-of-the-art fairness verifiers. We also demonstrate that FVGM facilitates the computation of fairness influence functions as a stepping stone to detect the source of bias induced by subsets of features.},
  pubstate = {preprint},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computers and Society,Computer Science - Machine Learning,Statistics - Applications},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\66U4LN97\\ghosh_2022.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\YI54HWZF\\2109.html}
}

@online{hacker_2023,
  title = {{{AI Regulation}} in {{Europe}}: {{From}} the {{AI Act}} to {{Future Regulatory Challenges}}},
  shorttitle = {{{AI Regulation}} in {{Europe}}},
  author = {Hacker, Philipp},
  date = {2023-10-06},
  eprint = {2310.04072},
  eprinttype = {arxiv},
  doi = {10.48550/arXiv.2310.04072},
  url = {http://arxiv.org/abs/2310.04072},
  urldate = {2023-10-09},
  abstract = {This chapter provides a comprehensive discussion on AI regulation in the European Union, contrasting it with the more sectoral and self-regulatory approach in the UK. It argues for a hybrid regulatory strategy that combines elements from both philosophies, emphasizing the need for agility and safe harbors to ease compliance. The paper examines the AI Act as a pioneering legislative effort to address the multifaceted challenges posed by AI, asserting that, while the Act is a step in the right direction, it has shortcomings that could hinder the advancement of AI technologies. The paper also anticipates upcoming regulatory challenges, such as the management of toxic content, environmental concerns, and hybrid threats. It advocates for immediate action to create protocols for regulated access to high-performance, potentially open-source AI systems. Although the AI Act is a significant legislative milestone, it needs additional refinement and global collaboration for the effective governance of rapidly evolving AI technologies.},
  pubstate = {preprint},
  version = {1},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computers and Society},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\H5L25VKL\\hacker_2023.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\8VZ4YI3K\\2310.html}
}

@inproceedings{hancox-li-2020-robus-machin,
  title = {Robustness in Machine Learning Explanations: {{Does}} It Matter?},
  booktitle = {Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency},
  author = {Hancox-Li, Leif},
  date = {2020},
  series = {{{FAT}}* '20},
  pages = {640--647},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  doi = {10.1145/3351095.3372836},
  url = {https://doi.org/10.1145/3351095.3372836},
  abstract = {The explainable AI literature contains multiple notions of what an explanation is and what desiderata explanations should satisfy. One implicit source of disagreement is how far the explanations should reflect real patterns in the data or the world. This disagreement underlies debates about other desiderata, such as how robust explanations are to slight perturbations in the input data. I argue that robustness is desirable to the extent that we're concerned about finding real patterns in the world. The import of real patterns differs according to the problem context. In some contexts, non-robust explanations can constitute a moral hazard. By being clear about the extent to which we care about capturing real patterns, we can also determine whether the Rashomon Effect is a boon or a bane.},
  isbn = {978-1-4503-6936-7},
  pagetotal = {8},
  keywords = {artificial intelligence,epistemology,ethics,explanation,machine learning,methodology,objectivity,philosophy,robustness},
  timestamp = {2023-06-15 11:46:58 (CEST)},
  file = {C:\Users\MarcoRepetto\Zotero\storage\IC4G6VKU\hancox-li-2020-robus-machin.pdf}
}

@article{hunkenschroer_2021,
  title = {How to {{Improve Fairness Perceptions}} of {{AI}} in {{Hiring}}: {{The Crucial Role}} of {{Positioning}} and {{Sensitization}}},
  shorttitle = {How to {{Improve Fairness Perceptions}} of {{AI}} in {{Hiring}}},
  author = {Hunkenschroer, Anna},
  date = {2021-07-16},
  journaltitle = {AI Ethics Journal},
  shortjournal = {AIEJ},
  volume = {2},
  number = {2},
  issn = {26901633, 26901625},
  doi = {10.47289/AIEJ20210716-3},
  url = {https://www.aiethicsjournal.org/10-47289-aiej20210716-3},
  urldate = {2023-09-12},
  abstract = {Companies increasingly deploy artificial intelligence (AI) technologies in their personnel recruiting and selection processes to streamline them, thus making them more efficient, consistent, and less human biased. However, prior research found that applicants prefer face-to-face interviews compared with AI interviews, perceiving them as less fair. Additionally, emerging evidence exists that contextual influences, such as the type of task for which AI is used, or applicants’ individual differences, may influence applicants’ reactions to AI-powered selection. The purpose of our study was to investigate whether adjusting process design factors may help to improve people's fairness perceptions of AI interviews. The results of our 2 x 2 x 2 online study (N = 404) showed that the positioning of the AI interview in the overall selection process, as well as participants’ sensitization to its potential to reduce human bias in the selection process have a significant effect on people’s perceptions of fairness. Additionally, these two process design factors had an indirect effect on overall organizational attractiveness mediated through applicants’ fairness perceptions. The findings may help organizations to optimize their deployment of AI in selection processes to improve people’s perceptions of fairness and thus attract top talent.},
  langid = {english},
  file = {C:\Users\MarcoRepetto\Zotero\storage\EWFP7DSF\hunkenschroer_2021.pdf}
}

@online{hupont_2023,
  title = {Use Case Cards: A Use Case Reporting Framework Inspired by the {{European AI Act}}},
  shorttitle = {Use Case Cards},
  author = {Hupont, Isabelle and Fernández-Llorca, David and Baldassarri, Sandra and Gómez, Emilia},
  date = {2023-06-23},
  eprint = {2306.13701},
  eprinttype = {arxiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2306.13701},
  url = {http://arxiv.org/abs/2306.13701},
  urldate = {2023-07-18},
  abstract = {Despite recent efforts by the Artificial Intelligence (AI) community to move towards standardised procedures for documenting models, methods, systems or datasets, there is currently no methodology focused on use cases aligned with the risk-based approach of the European AI Act (AI Act). In this paper, we propose a new framework for the documentation of use cases, that we call "use case cards", based on the use case modelling included in the Unified Markup Language (UML) standard. Unlike other documentation methodologies, we focus on the intended purpose and operational use of an AI system. It consists of two main parts. Firstly, a UML-based template, tailored to allow implicitly assessing the risk level of the AI system and defining relevant requirements. Secondly, a supporting UML diagram designed to provide information about the system-user interactions and relationships. The proposed framework is the result of a co-design process involving a relevant team of EU policy experts and scientists. We have validated our proposal with 11 experts with different backgrounds and a reasonable knowledge of the AI Act as a prerequisite. We provide the 5 "use case cards" used in the co-design and validation process. "Use case cards" allows framing and contextualising use cases in an effective way, and we hope this methodology can be a useful tool for policy makers and providers for documenting use cases, assessing the risk level, adapting the different requirements and building a catalogue of existing usages of AI.},
  pubstate = {preprint},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computers and Society},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\UHTEUBJP\\hupont_2023.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\3FSFE8PR\\2306.html}
}

@article{ii_,
  title = {Humans in the {{Loop}}},
  author = {Ii, Nicholson Price and Crootof, Rebecca and Kaminski, Margot},
  journaltitle = {VANDERBILT LAW REVIEW},
  volume = {76},
  langid = {english},
  file = {C:\Users\MarcoRepetto\Zotero\storage\FFFSHDFD\ii_.pdf}
}

@article{kinney_1972,
  title = {The {{Auditor}}'s {{Sampling Objectives}}: {{Four}} or {{Two}}?},
  shorttitle = {The {{Auditor}}'s {{Sampling Objectives}}},
  author = {Kinney, William R.},
  date = {1972},
  journaltitle = {Journal of Accounting Research},
  volume = {10},
  number = {2},
  eprint = {2490019},
  eprinttype = {jstor},
  pages = {407--412},
  publisher = {{[Accounting Research Center, Booth School of Business, University of Chicago, Wiley]}},
  issn = {0021-8456},
  doi = {10.2307/2490019},
  url = {https://www.jstor.org/stable/2490019},
  urldate = {2023-11-05}
}

@article{mariani_2023,
  title = {Trustworthy {{AI}}—{{Part}} 1},
  author = {Mariani, Riccardo and Rossi, Francesca and Cucchiara, Rita and Pavone, Marco and Simkin, Barnaby and Koene, Ansgar and Papenbrock, Jochen},
  date = {2023-02},
  journaltitle = {Computer},
  shortjournal = {Computer},
  volume = {56},
  number = {2},
  pages = {14--18},
  issn = {0018-9162, 1558-0814},
  doi = {10.1109/MC.2022.3227683},
  url = {https://ieeexplore.ieee.org/document/10042078/},
  urldate = {2023-10-04},
  langid = {english},
  file = {C:\Users\MarcoRepetto\Zotero\storage\DVXXPMFR\mariani_2023.pdf}
}

@article{mitchell-2019-model-cards,
  title = {Model Cards for Model Reporting},
  author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit},
  date = {2019-01},
  journaltitle = {Proceedings of the Conference on Fairness, Accountability, and Transparency},
  publisher = {{ACM}},
  doi = {10.1145/3287560.3287596},
  url = {http://dx.doi.org/10.1145/3287560.3287596},
  keywords = {replicability},
  timestamp = {2023-05-03 15:57:41 (CEST)},
  file = {C:\Users\MarcoRepetto\Zotero\storage\HA56BUHT\mitchell-2019-model-cards.pdf}
}

@online{nessler_2023,
  title = {Functional Trustworthiness of {{AI}} Systems by Statistically Valid Testing},
  author = {Nessler, Bernhard and Doms, Thomas and Hochreiter, Sepp},
  date = {2023-10-04},
  eprint = {2310.02727},
  eprinttype = {arxiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/2310.02727},
  urldate = {2023-10-05},
  abstract = {The authors are concerned about the safety, health, and rights of the European citizens due to inadequate measures and procedures required by the current draft of the EU Artificial Intelligence (AI) Act for the conformity assessment of AI systems. We observe that not only the current draft of the EU AI Act, but also the accompanying standardization efforts in CEN/CENELEC, have resorted to the position that real functional guarantees of AI systems supposedly would be unrealistic and too complex anyways. Yet enacting a conformity assessment procedure that creates the false illusion of trust in insufficiently assessed AI systems is at best naive and at worst grossly negligent. The EU AI Act thus misses the point of ensuring quality by functional trustworthiness and correctly attributing responsibilities. The trustworthiness of an AI decision system lies first and foremost in the correct statistical testing on randomly selected samples and in the precision of the definition of the application domain, which enables drawing samples in the first place. We will subsequently call this testable quality functional trustworthiness. It includes a design, development, and deployment that enables correct statistical testing of all relevant functions. We are firmly convinced and advocate that a reliable assessment of the statistical functional properties of an AI system has to be the indispensable, mandatory nucleus of the conformity assessment. In this paper, we describe the three necessary elements to establish a reliable functional trustworthiness, i.e., (1) the definition of the technical distribution of the application, (2) the risk-based minimum performance requirements, and (3) the statistically valid testing based on independent random samples.},
  pubstate = {preprint},
  version = {1},
  keywords = {assessment\_framework,Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {C:\Users\MarcoRepetto\Zotero\storage\LTW5TCLH\nessler_2023.pdf}
}

@online{nguyen_2023,
  title = {Fix {{Fairness}}, {{Don}}'t {{Ruin Accuracy}}: {{Performance Aware Fairness Repair}} Using {{AutoML}}},
  shorttitle = {Fix {{Fairness}}, {{Don}}'t {{Ruin Accuracy}}},
  author = {Nguyen, Giang and Biswas, Sumon and Rajan, Hridesh},
  date = {2023-08-28},
  eprint = {2306.09297},
  eprinttype = {arxiv},
  eprintclass = {cs},
  doi = {10.1145/3611643.3616257},
  url = {http://arxiv.org/abs/2306.09297},
  urldate = {2023-10-05},
  abstract = {Machine learning (ML) is increasingly being used in critical decision-making software, but incidents have raised questions about the fairness of ML predictions. To address this issue, new tools and methods are needed to mitigate bias in ML-based software. Previous studies have proposed bias mitigation algorithms that only work in specific situations and often result in a loss of accuracy. Our proposed solution is a novel approach that utilizes automated machine learning (AutoML) techniques to mitigate bias. Our approach includes two key innovations: a novel optimization function and a fairness-aware search space. By improving the default optimization function of AutoML and incorporating fairness objectives, we are able to mitigate bias with little to no loss of accuracy. Additionally, we propose a fairness-aware search space pruning method for AutoML to reduce computational cost and repair time. Our approach, built on the state-of-the-art Auto-Sklearn tool, is designed to reduce bias in real-world scenarios. In order to demonstrate the effectiveness of our approach, we evaluated our approach on four fairness problems and 16 different ML models, and our results show a significant improvement over the baseline and existing bias mitigation techniques. Our approach, Fair-AutoML, successfully repaired 60 out of 64 buggy cases, while existing bias mitigation techniques only repaired up to 44 out of 64 cases.},
  pubstate = {preprint},
  keywords = {Computer Science - Machine Learning,Computer Science - Software Engineering},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\K47NHK34\\nguyen_2023.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\IVHEE6HE\\2306.html}
}

@online{nitsure_2023,
  title = {Risk {{Assessment}} and {{Statistical Significance}} in the {{Age}} of {{Foundation Models}}},
  author = {Nitsure, Apoorva and Mroueh, Youssef and Rigotti, Mattia and Greenewald, Kristjan and Belgodere, Brian and Yurochkin, Mikhail and Navratil, Jiri and Melnyk, Igor and Ross, Jerret},
  date = {2023-10-10},
  eprint = {2310.07132},
  eprinttype = {arxiv},
  eprintclass = {cs, math, q-fin, stat},
  doi = {10.48550/arXiv.2310.07132},
  url = {http://arxiv.org/abs/2310.07132},
  urldate = {2023-10-12},
  abstract = {We propose a distributional framework for assessing socio-technical risks of foundation models with quantified statistical significance. Our approach hinges on a new statistical relative testing based on first and second order stochastic dominance of real random variables. We show that the second order statistics in this test are linked to mean-risk models commonly used in econometrics and mathematical finance to balance risk and utility when choosing between alternatives. Using this framework, we formally develop a risk-aware approach for foundation model selection given guardrails quantified by specified metrics. Inspired by portfolio optimization and selection theory in mathematical finance, we define a \textbackslash emph\{metrics portfolio\} for each model as a means to aggregate a collection of metrics, and perform model selection based on the stochastic dominance of these portfolios. The statistical significance of our tests is backed theoretically by an asymptotic analysis via central limit theorems instantiated in practice via a bootstrap variance estimate. We use our framework to compare various large language models regarding risks related to drifting from instructions and outputting toxic content.},
  pubstate = {preprint},
  version = {1},
  keywords = {Computer Science - Machine Learning,Mathematics - Statistics Theory,Quantitative Finance - Risk Management,Statistics - Machine Learning},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\Z2QF3AL9\\nitsure_2023.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\B834N36G\\2310.html}
}

@article{poretschkin_,
  title = {{{AI Assessment Catalog}}},
  author = {Poretschkin, Dr Maximilian and Schmitz, Anna and Akila, Dr Maram and Adilova, Linara and Becker, Dr Daniel and Cremers, Dr Armin B and Hecker, Dr Dirk and Houben, Dr Sebastian and Rosenzweig, Julia and Sicking, Joachim and Schulz, Elena and Voss, Dr Angelika and Wrobel, Dr Stefan},
  langid = {english},
  file = {C:\Users\MarcoRepetto\Zotero\storage\I9ZZPV8R\Poretschkin et al. - AI Assessment Catalog.pdf}
}

@online{rudin_2021a,
  title = {Interpretable {{Machine Learning}}: {{Fundamental Principles}} and 10 {{Grand Challenges}}},
  shorttitle = {Interpretable {{Machine Learning}}},
  author = {Rudin, Cynthia and Chen, Chaofan and Chen, Zhi and Huang, Haiyang and Semenova, Lesia and Zhong, Chudi},
  date = {2021-07-09},
  eprint = {2103.11251},
  eprinttype = {arxiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/2103.11251},
  urldate = {2023-10-22},
  abstract = {Interpretability in machine learning (ML) is crucial for high stakes decisions and troubleshooting. In this work, we provide fundamental principles for interpretable ML, and dispel common misunderstandings that dilute the importance of this crucial topic. We also identify 10 technical challenge areas in interpretable machine learning and provide history and background on each problem. Some of these problems are classically important, and some are recent problems that have arisen in the last few years. These problems are: (1) Optimizing sparse logical models such as decision trees; (2) Optimization of scoring systems; (3) Placing constraints into generalized additive models to encourage sparsity and better interpretability; (4) Modern case-based reasoning, including neural networks and matching for causal inference; (5) Complete supervised disentanglement of neural networks; (6) Complete or even partial unsupervised disentanglement of neural networks; (7) Dimensionality reduction for data visualization; (8) Machine learning models that can incorporate physics and other generative or causal constraints; (9) Characterization of the "Rashomon set" of good models; and (10) Interpretable reinforcement learning. This survey is suitable as a starting point for statisticians and computer scientists interested in working in interpretable machine learning.},
  pubstate = {preprint},
  keywords = {68T01,Computer Science - Machine Learning,I.2.6,Statistics - Machine Learning},
  file = {C:\Users\MarcoRepetto\Zotero\storage\7KQ7498P\2103.html}
}

@incollection{saaty_1988,
  title = {What Is the {{Analytic Hierarchy Process}}?},
  booktitle = {Mathematical {{Models}} for {{Decision Support}}},
  author = {Saaty, Thomas L.},
  editor = {Mitra, Gautam and Greenberg, Harvey J. and Lootsma, Freerk A. and Rijkaert, Marcel J. and Zimmermann, Hans J.},
  date = {1988},
  pages = {109--121},
  publisher = {{Springer Berlin Heidelberg}},
  location = {{Berlin, Heidelberg}},
  doi = {10.1007/978-3-642-83555-1_5},
  url = {http://link.springer.com/10.1007/978-3-642-83555-1_5},
  urldate = {2023-11-05},
  isbn = {978-3-642-83557-5 978-3-642-83555-1},
  langid = {english}
}

@online{schmidt_2019a,
  title = {Quantifying {{Interpretability}} and {{Trust}} in {{Machine Learning Systems}}},
  author = {Schmidt, Philipp and Biessmann, Felix},
  date = {2019-01-20},
  eprint = {1901.08558},
  eprinttype = {arxiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1901.08558},
  url = {http://arxiv.org/abs/1901.08558},
  urldate = {2023-10-22},
  abstract = {Decisions by Machine Learning (ML) models have become ubiquitous. Trusting these decisions requires understanding how algorithms take them. Hence interpretability methods for ML are an active focus of research. A central problem in this context is that both the quality of interpretability methods as well as trust in ML predictions are difficult to measure. Yet evaluations, comparisons and improvements of trust and interpretability require quantifiable measures. Here we propose a quantitative measure for the quality of interpretability methods. Based on that we derive a quantitative measure of trust in ML decisions. Building on previous work we propose to measure intuitive understanding of algorithmic decisions using the information transfer rate at which humans replicate ML model predictions. We provide empirical evidence from crowdsourcing experiments that the proposed metric robustly differentiates interpretability methods. The proposed metric also demonstrates the value of interpretability for ML assisted human decision making: in our experiments providing explanations more than doubled productivity in annotation tasks. However unbiased human judgement is critical for doctors, judges, policy makers and others. Here we derive a trust metric that identifies when human decisions are overly biased towards ML predictions. Our results complement existing qualitative work on trust and interpretability by quantifiable measures that can serve as objectives for further improving methods in this field of research.},
  pubstate = {preprint},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\DWMXZ43J\\schmidt_2019a.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\G54ZPZPZ\\1901.html}
}

@online{sun_2023,
  title = {Right for the {{Wrong Reason}}: {{Can Interpretable ML Techniques Detect Spurious Correlations}}?},
  shorttitle = {Right for the {{Wrong Reason}}},
  author = {Sun, Susu and Koch, Lisa M. and Baumgartner, Christian F.},
  date = {2023-08-08},
  eprint = {2307.12344},
  eprinttype = {arxiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2307.12344},
  urldate = {2023-08-09},
  abstract = {While deep neural network models offer unmatched classification performance, they are prone to learning spurious correlations in the data. Such dependencies on confounding information can be difficult to detect using performance metrics if the test data comes from the same distribution as the training data. Interpretable ML methods such as post-hoc explanations or inherently interpretable classifiers promise to identify faulty model reasoning. However, there is mixed evidence whether many of these techniques are actually able to do so. In this paper, we propose a rigorous evaluation strategy to assess an explanation technique's ability to correctly identify spurious correlations. Using this strategy, we evaluate five post-hoc explanation techniques and one inherently interpretable method for their ability to detect three types of artificially added confounders in a chest x-ray diagnosis task. We find that the post-hoc technique SHAP, as well as the inherently interpretable Attri-Net provide the best performance and can be used to reliably identify faulty model behavior.},
  pubstate = {preprint},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\9D4FXUVR\\sun_2023.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\FFPF6J8S\\2307.html}
}

@article{tayi_1985,
  title = {Integration of {{Auditor Preferences}} and {{Sampling Objectives}}: {{A Polynomial Goal-Programming Perspective}}},
  shorttitle = {Integration of {{Auditor Preferences}} and {{Sampling Objectives}}},
  author = {Tayi, Giri Kumar and Gangolly, Jagdish},
  date = {1985-10-01},
  journaltitle = {Journal of the Operational Research Society},
  shortjournal = {J Oper Res Soc},
  volume = {36},
  number = {10},
  pages = {951--957},
  issn = {1476-9360},
  doi = {10.1057/jors.1985.166},
  url = {https://doi.org/10.1057/jors.1985.166},
  urldate = {2023-10-30},
  abstract = {The choice of an audit sample involves integration of diverse goals. Such integration must consider the preferences of the auditor towards costs as well as risks. Traditionally this problem has been formulated to determine minimal sample size and its allocation such that each of the goals is satisfied simultaneously or to the extent possible. However, these formulations do not consider trade-offs between goals, so that the sample may not reflect the auditor's preferences regarding sampling costs and risks. In this paper, we model such trade-offs by utilizing a polynomial goal-programming framework and utilize a practical case problem to illustrate the framework. Computational experience shows the technique to be feasible and inexpensive.},
  langid = {english},
  keywords = {accounting,auditing,mathematical programming}
}

@article{vakkuri_2021,
  title = {{{ECCOLA}} — {{A}} Method for Implementing Ethically Aligned {{AI}} Systems},
  author = {Vakkuri, Ville and Kemell, Kai-Kristian and Jantunen, Marianna and Halme, Erika and Abrahamsson, Pekka},
  date = {2021-12-01},
  journaltitle = {Journal of Systems and Software},
  shortjournal = {Journal of Systems and Software},
  volume = {182},
  pages = {111067},
  issn = {0164-1212},
  doi = {10.1016/j.jss.2021.111067},
  url = {https://www.sciencedirect.com/science/article/pii/S0164121221001643},
  urldate = {2023-10-29},
  abstract = {Artificial Intelligence (AI) systems are becoming increasingly widespread and exert a growing influence on society at large. The growing impact of these systems has also highlighted potential issues that may arise from their utilization, such as data privacy issues, resulting in calls for ethical AI systems. Yet, how to develop ethical AI systems remains an important question in the area. How should the principles and values be converted into requirements for these systems, and what should developers and the organizations developing these systems do? To further bridge this gap in the area, in this paper, we present a method for implementing AI ethics: ECCOLA. Following a cyclical action research approach, ECCOLA has been iteratively developed over the course of multiple years, in collaboration with both researchers and practitioners.},
  keywords = {AI ethics,Artificial intelligence,assessment\_framework,Ethics,Implementing,Method},
  file = {C:\Users\MarcoRepetto\Zotero\storage\88MVMXRC\vakkuri_2021.pdf}
}

@article{vetter_2023,
  title = {Lessons {{Learned}} from {{Assessing Trustworthy AI}} in {{Practice}}},
  author = {Vetter, Dennis and Amann, Julia and Bruneault, Frédérick and Coffee, Megan and Düdder, Boris and Gallucci, Alessio and Gilbert, Thomas Krendl and Hagendorff, Thilo and Van Halem, Irmhild and Hickman, Eleanore and Hildt, Elisabeth and Holm, Sune and Kararigas, Georgios and Kringen, Pedro and Madai, Vince I. and Wiinblad Mathez, Emilie and Tithi, Jesmin Jahan and Westerlund, Magnus and Wurth, Renee and Zicari, Roberto V. and {Z-Inspection® initiative (2022)}},
  date = {2023-12},
  journaltitle = {Digital Society},
  shortjournal = {DISO},
  volume = {2},
  number = {3},
  pages = {35},
  issn = {2731-4650, 2731-4669},
  doi = {10.1007/s44206-023-00063-1},
  url = {https://link.springer.com/10.1007/s44206-023-00063-1},
  urldate = {2023-10-15},
  abstract = {Abstract                            Building artificial intelligence (AI) systems that adhere to ethical standards is a complex problem. Even though a multitude of guidelines for the design and development of such trustworthy AI systems exist, these guidelines focus on high-level and abstract requirements for AI systems, and it is often very difficult to assess if a specific system fulfills these requirements. The Z-Inspection® process provides a holistic and dynamic framework to evaluate the trustworthiness of specific AI systems at different stages of the AI lifecycle, including intended use, design, and development. It focuses, in particular, on the discussion and identification of ethical issues and tensions through the analysis of socio-technical scenarios and a requirement-based framework for ethical and trustworthy AI. This article is a methodological reflection on the Z-Inspection® process. We illustrate how high-level guidelines for ethical and trustworthy AI can be applied in practice and provide insights for both AI researchers and AI practitioners. We share the lessons learned from conducting a series of independent assessments to evaluate the trustworthiness of real-world AI systems, as well as key recommendations and practical suggestions on how to ensure a rigorous trustworthiness assessment throughout the lifecycle of an AI system. The results presented in this article are based on our assessments of AI systems in the healthcare sector and environmental monitoring, where we used the framework for trustworthy AI proposed in the               Ethics Guidelines for Trustworthy AI               by the European Commission’s High-Level Expert Group on AI. However, the assessment process and the lessons learned can be adapted to other domains and include additional frameworks.},
  langid = {english},
  keywords = {assessment\_framework},
  file = {C:\Users\MarcoRepetto\Zotero\storage\7CBHS7A2\vetter_2023.pdf}
}

@online{walters_2023a,
  title = {Complying with the {{EU AI Act}}},
  author = {Walters, Jacintha and Dey, Diptish and Bhaumik, Debarati and Horsman, Sophie},
  date = {2023-07-19},
  eprint = {2307.10458},
  eprinttype = {arxiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2307.10458},
  url = {http://arxiv.org/abs/2307.10458},
  urldate = {2023-10-05},
  abstract = {The EU AI Act is the proposed EU legislation concerning AI systems. This paper identifies several categories of the AI Act. Based on this categorization, a questionnaire is developed that serves as a tool to offer insights by creating quantitative data. Analysis of the data shows various challenges for organizations in different compliance categories. The influence of organization characteristics, such as size and sector, is examined to determine the impact on compliance. The paper will also share qualitative data on which questions were prevalent among respondents, both on the content of the AI Act as the application. The paper concludes by stating that there is still room for improvement in terms of compliance with the AIA and refers to a related project that examines a solution to help these organizations.},
  pubstate = {preprint},
  keywords = {Computer Science - Artificial Intelligence,I.2},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\R8KSZ6BL\\walters_2023a.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\BV9PH2LD\\2307.html}
}

@article{whittlestone_2019,
  title = {Ethical and Societal Implications of Algorithms, Data, and Artificial Intelligence: A Roadmap for Research},
  author = {Whittlestone, Jess and Nyrup, Rune and Alexandrova, Anna and Dihal, Kanta and Cave, Stephen},
  date = {2019},
  journaltitle = {London: Nuffield Foundation},
  langid = {english},
  file = {C:\Users\MarcoRepetto\Zotero\storage\VU65VIMV\Whittlestone et al. - Ethical and societal implications of algorithms, d.pdf}
}

@online{winter_2021,
  title = {Trusted {{Artificial Intelligence}}: {{Towards Certification}} of {{Machine Learning Applications}}},
  shorttitle = {Trusted {{Artificial Intelligence}}},
  author = {Winter, Philip Matthias and Eder, Sebastian and Weissenböck, Johannes and Schwald, Christoph and Doms, Thomas and Vogt, Tom and Hochreiter, Sepp and Nessler, Bernhard},
  date = {2021-03-31},
  eprint = {2103.16910},
  eprinttype = {arxiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.2103.16910},
  url = {http://arxiv.org/abs/2103.16910},
  urldate = {2023-10-05},
  abstract = {Artificial Intelligence is one of the fastest growing technologies of the 21st century and accompanies us in our daily lives when interacting with technical applications. However, reliance on such technical systems is crucial for their widespread applicability and acceptance. The societal tools to express reliance are usually formalized by lawful regulations, i.e., standards, norms, accreditations, and certificates. Therefore, the T\textbackslash "UV AUSTRIA Group in cooperation with the Institute for Machine Learning at the Johannes Kepler University Linz, proposes a certification process and an audit catalog for Machine Learning applications. We are convinced that our approach can serve as the foundation for the certification of applications that use Machine Learning and Deep Learning, the techniques that drive the current revolution in Artificial Intelligence. While certain high-risk areas, such as fully autonomous robots in workspaces shared with humans, are still some time away from certification, we aim to cover low-risk applications with our certification procedure. Our holistic approach attempts to analyze Machine Learning applications from multiple perspectives to evaluate and verify the aspects of secure software development, functional requirements, data quality, data protection, and ethics. Inspired by existing work, we introduce four criticality levels to map the criticality of a Machine Learning application regarding the impact of its decisions on people, environment, and organizations. Currently, the audit catalog can be applied to low-risk applications within the scope of supervised learning as commonly encountered in industry. Guided by field experience, scientific developments, and market demands, the audit catalog will be extended and modified accordingly.},
  pubstate = {preprint},
  keywords = {assessment\_framework,Computer Science - Computers and Society,Computer Science - Machine Learning,Computer Science - Software Engineering,Statistics - Machine Learning},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\T89UWFWR\\winter_2021.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\R2J747UQ\\2103.html}
}

@inproceedings{zhou_2018,
  title = {Measuring {{Interpretability}} for {{Different Types}} of {{Machine Learning Models}}},
  booktitle = {Trends and {{Applications}} in {{Knowledge Discovery}} and {{Data Mining}}},
  author = {Zhou, Qing and Liao, Fenglu and Mou, Chao and Wang, Ping},
  editor = {Ganji, Mohadeseh and Rashidi, Lida and Fung, Benjamin C. M. and Wang, Can},
  date = {2018},
  series = {Lecture {{Notes}} in {{Computer Science}}},
  pages = {295--308},
  publisher = {{Springer International Publishing}},
  location = {{Cham}},
  doi = {10.1007/978-3-030-04503-6_29},
  abstract = {The interpretability of a machine learning model plays a significant role in practical applications, thus it is necessary to develop a method to compare the interpretability for different models so as to select the most appropriate one. However, model interpretability, a highly subjective concept, is difficult to be accurately measured, not to mention the interpretability comparison of different models. To this end, we develop an interpretability evaluation model to compute model interpretability and compare interpretability for different models. Specifically, first we we present a general form of model interpretability. Second, a questionnaire survey system is developed to collect information about users’ understanding of a machine learning model. Next, three structure features are selected to investigate the relationship between interpretability and structural complexity. After this, an interpretability label is build based on the questionnaire survey result and a linear regression model is developed to evaluate the relationship between the structural features and model interpretability. The experiment results demonstrate that our interpretability evaluation model is valid and reliable to evaluate the interpretability of different models.},
  isbn = {978-3-030-04503-6},
  langid = {english},
  keywords = {Interpretability evaluation model,Machine learning models,Model interpretability,Structural complexity},
  file = {C:\Users\MarcoRepetto\Zotero\storage\GDILN5HN\zhou_2018.pdf}
}

@online{zhou_2023,
  title = {The {{Solvability}} of {{Interpretability Evaluation Metrics}}},
  author = {Zhou, Yilun and Shah, Julie},
  date = {2023-02-02},
  eprint = {2205.08696},
  eprinttype = {arxiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2205.08696},
  urldate = {2023-10-21},
  abstract = {Feature attribution methods are popular for explaining neural network predictions, and they are often evaluated on metrics such as comprehensiveness and sufficiency. In this paper, we highlight an intriguing property of these metrics: their solvability. Concretely, we can define the problem of optimizing an explanation for a metric, which can be solved by beam search. This observation leads to the obvious yet unaddressed question: why do we use explainers (e.g., LIME) not based on solving the target metric, if the metric value represents explanation quality? We present a series of investigations showing strong performance of this beam search explainer and discuss its broader implication: a definition-evaluation duality of interpretability concepts. We implement the explainer and release the Python solvex package for models of text, image and tabular domains.},
  pubstate = {preprint},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Machine Learning},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\D97WZARN\\zhou_2023.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\BHJIRQDV\\2205.html}
}

@article{zicari_2021,
  title = {Z-{{Inspection}}®: {{A Process}} to {{Assess Trustworthy AI}}},
  shorttitle = {Z-{{Inspection}}®},
  author = {Zicari, Roberto V. and Brodersen, John and Brusseau, James and Düdder, Boris and Eichhorn, Timo and Ivanov, Todor and Kararigas, Georgios and Kringen, Pedro and McCullough, Melissa and Möslein, Florian and Mushtaq, Naveed and Roig, Gemma and Stürtz, Norman and Tolle, Karsten and Tithi, Jesmin Jahan and family=Halem, given=Irmhild, prefix=van, useprefix=true and Westerlund, Magnus},
  date = {2021-06},
  journaltitle = {IEEE Transactions on Technology and Society},
  volume = {2},
  number = {2},
  pages = {83--97},
  issn = {2637-6415},
  doi = {10.1109/TTS.2021.3066209},
  url = {https://ieeexplore.ieee.org/document/9380498?denied=},
  urldate = {2023-10-29},
  abstract = {The ethical and societal implications of artificial intelligence systems raise concerns. In this article, we outline a novel process based on applied ethics, namely, Z-Inspection®, to assess if an AI system is trustworthy. We use the definition of trustworthy AI given by the high-level European Commission's expert group on AI. Z-Inspection® is a general inspection process that can be applied to a variety of domains where AI systems are used, such as business, healthcare, and public sector, among many others. To the best of our knowledge, Z-Inspection® is the first process to assess trustworthy AI in practice.},
  eventtitle = {{{IEEE Transactions}} on {{Technology}} and {{Society}}},
  keywords = {assessment\_framework},
  file = {C\:\\Users\\MarcoRepetto\\Zotero\\storage\\NBWKWH35\\zicari_2021.pdf;C\:\\Users\\MarcoRepetto\\Zotero\\storage\\W8AVP65V\\9380498.html}
}