{"$schema": "https://c3voc.de/schedule/schema.json", "generator": {"name": "pretalx", "version": "2026.1.1"}, "schedule": {"url": "https://cfp.pydata.org/london2025/schedule/", "version": "0.34", "base_url": "https://cfp.pydata.org", "conference": {"acronym": "london2025", "title": "PyData London 2025", "start": "2025-06-06", "end": "2025-06-08", "daysCount": 3, "timeslot_duration": "00:05", "time_zone_name": "Europe/London", "colors": {"primary": "#4C9CB4"}, "rooms": [{"name": "Grand Hall", "slug": "4554-grand-hall", "guid": "d4d54fb8-0f29-585a-a13a-61f0ffdd8d73", "description": null, "capacity": null}, {"name": "Doddington Forum", "slug": "4555-doddington-forum", "guid": "ecf80c32-e548-5858-a372-2b8a2e31f1a3", "description": null, "capacity": null}, {"name": "Hardwick Hub", "slug": "4556-hardwick-hub", "guid": "847bf967-3181-5510-bf4e-b74c5874664d", "description": null, "capacity": null}, {"name": "Library", "slug": "4557-library", "guid": "cec9235a-122a-50f8-ad9c-917059c0ff46", "description": "Library is on the ground floor", "capacity": null}, {"name": "Elizabeth Board Room", "slug": "4559-elizabeth-board-room", "guid": "ba43ad98-f370-5137-86e9-13dc6e5e8750", "description": "Located on Floor 1", "capacity": null}], "tracks": [{"name": "Quant Finance Track Sponsored by Man Group", "slug": "6088-quant-finance-track-sponsored-by-man-group", "color": "#165CAA"}], "days": [{"index": 1, "date": "2025-06-06", "day_start": "2025-06-06T04:00:00+01:00", "day_end": "2025-06-07T03:59:00+01:00", "rooms": {"Grand Hall": [{"guid": "dae0ff28-08d2-5125-8aa1-60094f2cc746", "code": "HFWMHG", "id": 77392, "logo": null, "date": "2025-06-06T09:00:00+01:00", "start": "09:00", "duration": "03:30", "room": "Grand Hall", "slug": "london2025-77392-gpu-accelerated-python", "url": "https://cfp.pydata.org/london2025/talk/HFWMHG/", "title": "GPU Accelerated Python", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "Accelerating Python using the GPU is much easier than you might think. We will explore the powerful CUDA-enabled Python ecosystem in this tutorial through hands-on examples using some of the most popular accelerated scientific computing libraries.\n\nTopics include:\n- Introduction to General Purpose GPU Computing\n- GPU vs CPU - Which processor is best for which tasks\n- Introduction to CUDA\n- How to use CUDA with Python\n- Using Numba to write kernel functions\n- CuPy\n- cuDF\n\nNo prior experience with GPU's is necessary, but attendees should be familiar with Python.", "description": "", "recording_license": "", "do_not_record": false, "persons": [{"code": "EE7H7J", "name": "Jacob Tomlinson", "avatar": "https://cfp.pydata.org/media/avatars/EE7H7J_KtXk3nN.webp", "biography": "Jacob Tomlinson is a senior Python software engineer at [NVIDIA](https://www.nvidia.com/) with a focus on deployment tooling for distributed systems. His work involves maintaining open source projects including [RAPIDS](https://rapids.ai/) and [Dask](https://dask.org/). RAPIDS is a suite of GPU accelerated open source Python tools which mimic APIs from the PyData stack including those of Numpy, Pandas and SciKit-Learn. Dask provides advanced parallelism for analytics with out-of-core computation, lazy evaluation and distributed execution of the PyData stack. He also tinkers with the open source Kubernetes Python framework [`kr8s`](https://docs.kr8s.org/en/stable/) in his spare time. Jacob volunteers with the local tech community group [Tech Exeter](https://techexeter.uk/) and lives in Exeter, UK.", "public_name": "Jacob Tomlinson", "guid": "7d5794a8-e43e-58a6-9a19-8751d101fde1", "url": "https://cfp.pydata.org/london2025/speaker/EE7H7J/"}, {"code": "Z9ENP8", "name": "Katrina Riehl", "avatar": "https://cfp.pydata.org/media/avatars/Z9ENP8_ml6Zw2v.webp", "biography": "Dr. Katrina Riehl is a Principal Technical Product Manager at NVIDIA supporting CUDA and Python. For over two decades, Katrina has worked extensively in the fields of scientific computing, machine learning, data science, and visualization. Most notably, she has helped lead initiatives at the University of Texas Austin Applied Research Laboratory, Anaconda, Apple, Expedia Group, Cloudflare, and Snowflake. She is an active volunteer in the Python open-source scientific software community and currently serves on the Advisory Council for NumFOCUS.", "public_name": "Katrina Riehl", "guid": "885c34b1-3992-5e82-988f-01bce678c58b", "url": "https://cfp.pydata.org/london2025/speaker/Z9ENP8/"}, {"code": "VTVTRN", "name": "Jeremy Tanner", "avatar": "https://cfp.pydata.org/media/avatars/VTVTRN_rci9izw.webp", "biography": "Jeremy talks to people who talk to computers about talking to computers.", "public_name": "Jeremy Tanner", "guid": "fc0fde34-d613-5ee9-864a-a9d64e0b5938", "url": "https://cfp.pydata.org/london2025/speaker/VTVTRN/"}, {"code": "M9CR7H", "name": "Lawrence Mitchell", "avatar": null, "biography": "Lawrence Mitchell works and thinks as part of the RAPIDS team at NVIDIA. His focus is on high-productivity, high-performance libraries for data analytics. He leads the technical design and implementation of the RAPIDS-accelerated Polars GPU engine. Prior to joining NVIDIA he was a lecturer in Computer Science and Applied Mathematics at the University of Durham with research interests in high performance simulation of continuum mechanics, structure-preserving numerical methods, and preconditioning techniques for coupled multiphysics problems. He was a founding co-lead and technical architect of the open source Firedrake project for finite element simulation.", "public_name": "Lawrence Mitchell", "guid": "346aff5b-2962-500f-b7cc-c3caa29e16c2", "url": "https://cfp.pydata.org/london2025/speaker/M9CR7H/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/HFWMHG/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/HFWMHG/", "attachments": []}, {"guid": "9e253444-da08-54ce-974e-1217c95c642d", "code": "UTCBUH", "id": 77364, "logo": "https://cfp.pydata.org/media/london2025/submissions/UTCBUH/PyData_London_-_AI_fairne_moZkCQt.png", "date": "2025-06-06T13:30:00+01:00", "start": "13:30", "duration": "01:30", "room": "Grand Hall", "slug": "london2025-77364-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models", "url": "https://cfp.pydata.org/london2025/talk/UTCBUH/", "title": "How To Measure And Mitigate Unfair Bias in Machine Learning Models", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "In this 90-minute workshop, machine learning engineers and data scientists will learn practical techniques for identifying and mitigating age bias in AI-driven hiring systems. We\u2019ll explore fairness metrics like statistical parity, counterfactual fairness, and equalized odds, and demonstrate how tools such as [Fairlearn](https://fairlearn.org/), [Aequitas](http://aequitas.dssg.io/), and [AI Fairness 360](https://ai-fairness-360.org/) can be used to monitor and improve model fairness. Through hands-on exercises, participants will walk away with the skills to evaluate and de-bias models in high-risk areas like recruitment.", "description": "AI tools used in hiring can unintentionally perpetuate discrimination in protected characteristics such as age, gender and ethnicity, leading to significant real-world harm. This workshop provides a practical, hands-on approach to addressing biases in machine learning models, using the example of AI-powered hiring tools. You\u2019ll train a neural network on biased datasets, evaluate fairness metrics, and work with state-of-the-art tools like [Fairlearn](https://fairlearn.org/) and [Google\u2019s What-If Tool](https://pair-code.github.io/what-if-tool/) to measure and mitigate bias. By the end of the session, participants will be equipped with the knowledge and tools to tackle bias in their own projects and ensure fairer AI systems.", "recording_license": "", "do_not_record": false, "persons": [{"code": "MM3RDV", "name": "John Sandall", "avatar": "https://cfp.pydata.org/media/avatars/MM3RDV_zNZRLPX.webp", "biography": "John Sandall is the CEO and Principal Data Scientist at **[Coefficient](https://coefficient.ai)**.\n\nHis experience in data science and software engineering spans multiple industries and applications, and his passion for the power of data extends far beyond his work for Coefficient\u2019s clients. In April 2017 he created SixFifty in order to predict the UK General Election using open data and advanced modelling techniques. Previous experience includes Lead Data Scientist at YPlan, business analytics at Apple, genomics research at Imperial College London, building an ed-tech startup at Knodium, developing strategy & technological infrastructure for international non-profit startup STIR Education, and losing sleep to many hackathons along the way.\n\nJohn is also a co-organiser of PyData London, co-founded Humble Data in 2019 to promote diversity in data science through a programme of free bootcamps, and in 2020 was a Committee Chair for the PyData Global Conference. He is currently a Fellow of Newspeak House with interests in open data, AI ethics and promoting diversity in tech.", "public_name": "John Sandall", "guid": "51d6eaf7-6101-5be8-910e-bd562fbcab9d", "url": "https://cfp.pydata.org/london2025/speaker/MM3RDV/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/UTCBUH/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/UTCBUH/", "attachments": []}, {"guid": "94f98b45-83f6-5666-a82a-b4da3fdfe84b", "code": "J83ZYE", "id": 77442, "logo": null, "date": "2025-06-06T15:30:00+01:00", "start": "15:30", "duration": "01:30", "room": "Grand Hall", "slug": "london2025-77442-building-your-own-vertical-agent-with-ag2-agentos", "url": "https://cfp.pydata.org/london2025/talk/J83ZYE/", "title": "Building your own vertical agent with AG2 AgentOS", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "In this tutorial, we will cover basic and advanced agentic design patterns in AG2 and we will go through practical implementations to demonstrate AI agents in action.", "description": "Majority of knowledge work nowadays requires comprehensive, integrated research in order to uncover deep insights. While existing technologies have advanced, the data deluge and fragmented complex systems mean extensive resources and specialised teams are still necessary. AG2 AgentOS changes this paradigm by seamlessly enabling multi-agent systems to solve complex tasks and aggregate diverse data sources to achieve outcomes that would usually take even experts a lot of time.\n\nIn this session, we will cover:\n1. Design patterns and practical implementations to demonstrate AI agents in action such as\n- Customized GroupChat\n- Code execution\n- Deep Research Agent\n- Swarm\n- Tool using\n- Async chats\n- Dynamic instructions\n- Realtime Agent\n- GraphRAG\n- Structured Output\n2. The anatomy of a Vertical AI agent application and how to seamlessly integrate multiple agents powered by models from OpenAI, Anthropic, Gemini, and open-weight providers, and a diverse range of tools to build your own vertical agent. \n3. We will utilise components we\u2019ve learned to collect information from the internet, connect to a data room, and create various modelling functions to replicate analysis done in a technical and commercial deep dive in a startup.\n4. Explain how to contribute to the thriving AI agent ecosystem.\n\nTarget industries and use cases: \n1. Industries that require deep research, such as finance, healthcare, science & engineering. Research/Analysis/Science use cases: Deep Research Agent, SciAgents, Financial Analysis, AutoML Agent.\n2. Industries involving customer support, such as e-commerce, education, social media. Customer-oriented use cases: Travel Planner, Order Management, Realtime ToDo Assistant, Email Management, -Social Media Management, Youth Helper.\n3. Industries involving heavy software design & development, such as gaming, web, data engineering. Software-oriented use cases: Game Design Agents, Web Agent, Software Testing Agent\n\nAt the end of the tutorial, the attendees would gain a better understanding of agent-oriented programming concepts and how to reach production-readiness 10x faster. Through the examples given, they will be able to construct effective multi-agent systems to solve complex tasks. They will have reusable building blocks to customize for their own vertical agent.", "recording_license": "", "do_not_record": false, "persons": [{"code": "USH9LJ", "name": "Tim Santos", "avatar": null, "biography": "I'm leading Graphcore\u2019s Cloud Solutions ecosystem helping AI & ML software development teams build AI products and deploy ML capabilities in production. I've gained experience taking AI applications from the research lab to large scale deployments. Also a Deeplearning.ai ambassador and founder of AI Hive community, startup advisor and visiting fellow at zinc.vc.", "public_name": "Tim Santos", "guid": "31fd1794-08b1-575f-a96f-d4246418ec32", "url": "https://cfp.pydata.org/london2025/speaker/USH9LJ/"}, {"code": "DCFZR9", "name": "Chi Wang", "avatar": null, "biography": "Chi Wang is founder of AutoGen (now AG2), the open-source AgentOS to support agentic AI, and its parent open-source project FLAML, a fast library for AutoML & tuning. Chi runs the AG2 community with 20K+ members. He has received multiple awards such as best paper of ICLR'24 LLM Agents Workshop, Open100, and SIGKDD Data Science/Data Mining PhD Dissertation Award. He has 15+ years of research experience in Computer Science from Google DeepMind, Microsoft Research, Meta, UIUC and Tsinghua.", "public_name": "Chi Wang", "guid": "0c3774e7-a6a0-5e7d-a2ca-e805c1fb506a", "url": "https://cfp.pydata.org/london2025/speaker/DCFZR9/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/J83ZYE/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/J83ZYE/", "attachments": []}], "Doddington Forum": [{"guid": "55d2ad5d-c386-5878-9c51-de9329cb8ff4", "code": "T9KEHN", "id": 77262, "logo": null, "date": "2025-06-06T09:00:00+01:00", "start": "09:00", "duration": "01:30", "room": "Doddington Forum", "slug": "london2025-77262-introduction-to-bayesian-time-series-analysis-with-pymc", "url": "https://cfp.pydata.org/london2025/talk/T9KEHN/", "title": "Introduction to Bayesian Time Series Analysis with PyMC", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "Time series data is ubiquitous, from stock market prices and weather patterns to disease outbreaks and sports outcomes.  Accurately modeling these data and generating useful predictions requires specialized techniques due to the unique characteristics of time series data. This tutorial provides a practical introduction to Bayesian time series analysis using PyMC, a powerful probabilistic programming library in Python. Participants will learn how to build, evaluate, and interpret various Bayesian time series models, including ARIMA models, dynamic linear models, and stochastic volatility models.  We'll emphasize practical application, covering data preprocessing, model selection, diagnostics, and forecasting, empowering attendees to tackle real-world time series problems with confidence.", "description": "Traditional time series methods often struggle with complex patterns, uncertainty quantification, and incorporating prior knowledge. Bayesian methods offer a robust alternative, providing a flexible framework for handling these challenges. This tutorial will equip participants with the skills to leverage the power of Bayesian time series analysis using PyMC.\n\nThis tutorial is designed for data scientists, analysts, and researchers with some familiarity with Python and basic statistical concepts. Prior experience with time series analysis is helpful but not strictly required.  A basic understanding of probability distributions and Bayesian inference will be beneficial, but we will review key concepts.  Participants should be comfortable working with Jupyter notebooks.\n\nBy the end of this tutorial, participants will be able to:\n\n- Understand the advantages of Bayesian time series analysis.\n- Implement various Bayesian time series models using PyMC.\n- Preprocess time series data for Bayesian modeling.\n- Perform model selection and comparison.\n- Evaluate model fit and diagnose potential issues.\n- Generate forecasts and interpret results.\n- Apply Bayesian time series methods to real-world datasets.", "recording_license": "", "do_not_record": false, "persons": [{"code": "MZZ8YC", "name": "Chris Fonnesbeck", "avatar": "https://cfp.pydata.org/media/avatars/MZZ8YC_Toi8z8j.webp", "biography": "Chris is a Principal Quantitative Analyst at PyMC Labs and an Adjoint Associate Professor at the Vanderbilt University Medical Center, with 20 years of experience as a data scientist in academia, industry, and government. He is interested in computational statistics, machine learning, Bayesian methods, and applied decision analysis. He hails from Vancouver, Canada and received his Ph.D. from the University of Georgia.\u200b\u200b", "public_name": "Chris Fonnesbeck", "guid": "1820c9f2-1b76-5976-b9bd-1d66269268af", "url": "https://cfp.pydata.org/london2025/speaker/MZZ8YC/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/T9KEHN/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/T9KEHN/", "attachments": []}, {"guid": "84a6d3f1-52e9-5957-9c53-98494169e250", "code": "U7VZKA", "id": 77573, "logo": "https://cfp.pydata.org/media/london2025/submissions/U7VZKA/ChatGPT_Image_Apr_29_2025_p9pFmJO.png", "date": "2025-06-06T11:00:00+01:00", "start": "11:00", "duration": "01:30", "room": "Doddington Forum", "slug": "london2025-77573-forecasting-weather-using-time-series-ml", "url": "https://cfp.pydata.org/london2025/talk/U7VZKA/", "title": "Forecasting Weather using Time Series ML", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "This hands-on workshop covers how to use open source ML models like LSTMs and TimeSeries LLM's, with Python to try to forecast weather patterns, with best practices for data preparation and real time predictions.", "description": "Weather patterns are notoriously challenging to predict, typically requiring sophisticated satellite technology and advanced modeling techniques. However, recent advancements in deep learning for time series forecasting offer powerful new methods to tackle this complexity.\n\nIn this hands-on workshop, you will learn to try to forecast weather conditions for the next six months using Python, Google Colab, InfluxDB and popular libraries like Neural Prophet and state of the art Time Series LLMs. Learn the strengths, weaknesses, and common pitfalls of each approach, from classical techniques (ARIMA) to using Transformers. We\u2019ll explore data preprocessing, model training, evaluation, with practical examples and ready-to-use notebooks. All code and instructions will be available on GitHub, ensuring you can continue exploring time series forecasting beyond the session.", "recording_license": "", "do_not_record": false, "persons": [{"code": "MMURD9", "name": "Suyash Joshi", "avatar": "https://cfp.pydata.org/media/avatars/MMURD9_dFWfwVI.webp", "biography": "Suyash Joshi is an accomplished engineer and developer advocate at InfluxData, with previous roles at Oracle and RingCentral. Holding a B.S. in Computer Science and an M.A. in Game Design, he merges technical expertise with creativity. He is dedicated to community building, delivering talks & workshops globally while sharing his knowledge and connecting with others.", "public_name": "Suyash Joshi", "guid": "ff3998d4-2f33-5aed-b48d-28b03a765277", "url": "https://cfp.pydata.org/london2025/speaker/MMURD9/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/U7VZKA/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/U7VZKA/", "attachments": []}, {"guid": "d17d867e-5cc5-52b3-bce4-88864e3098e2", "code": "Z3UW79", "id": 77223, "logo": null, "date": "2025-06-06T13:30:00+01:00", "start": "13:30", "duration": "01:30", "room": "Doddington Forum", "slug": "london2025-77223-python-meets-quantum-learn-code-and-simulate", "url": "https://cfp.pydata.org/london2025/talk/Z3UW79/", "title": "Python Meets Quantum: Learn, Code, and Simulate", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "This workshop is designed for Python developers eager to explore the exciting world of quantum computing. Through interactive exercises and practical coding examples, participants will learn how to program quantum computers using Python. No advanced background in quantum mechanics is required - just curiosity and a willingness to dive into cutting-edge technology.", "description": "Workshop Highlights\n\n1. Quantum Computing Fundamentals\n\n    - A beginner-friendly introduction to key quantum concepts: qubits, superposition, entanglement, and quantum gates.\n    - Understand why quantum computing is groundbreaking and how it differs from classical computing.\n\n2. Getting Started with Quantum Programming in Python\n\n    - Hands-on setup: installing and configuring Qiskit and other essential libraries.\n    - Build and execute your first quantum circuits.\n\n3. Developing Quantum Programs\n\n    - Create and simulate quantum circuits for fundamental algorithms like the Quantum Fourier Transform and Grover\u2019s search.\n    - Learn how to test quantum programs on simulators before running them on real quantum hardware.\n\nWho Should Attend?\n\nThis workshop is ideal for Python developers, data scientists, and ML practitioners curious about quantum computing. Basic Python knowledge is recommended - no prior experience in quantum physics is needed.", "recording_license": "", "do_not_record": false, "persons": [{"code": "L7KBLH", "name": "Andrea Melloncelli", "avatar": "https://cfp.pydata.org/media/avatars/L7KBLH_g5yOZZh.webp", "biography": "A physicist by education and a lecturer of programming for data science and applied statistics for some Milano universities, I worked as a data scientist to provide data-based business solutions. For example, my specialities include numerical optimization, NLP, Time Series analysis, signal analysis, and modelling projects.\n\nI co-founded Apply Quantum (https://applyquantum.ai), specialising in AI, quantum computing, and providing training.", "public_name": "Andrea Melloncelli", "guid": "349d768c-55c0-58e3-84c0-7c777a53f6b8", "url": "https://cfp.pydata.org/london2025/speaker/L7KBLH/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/Z3UW79/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/Z3UW79/", "attachments": []}, {"guid": "116c8dce-6a06-5db1-a2c0-fb5c2757c390", "code": "W7WYMM", "id": 77113, "logo": null, "date": "2025-06-06T15:30:00+01:00", "start": "15:30", "duration": "01:30", "room": "Doddington Forum", "slug": "london2025-77113-hands-on-workshop-on-developing-reinforcement-learning-solutions-with-financial-domain-example-use-cases", "url": "https://cfp.pydata.org/london2025/talk/W7WYMM/", "title": "Hands-on workshop on developing Reinforcement Learning solutions with financial domain example use cases.", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "Reinforcement Learning (RL) has emerged as a transformative sub-field in AI/ML, driving breakthroughs in areas ranging from autonomous robotics to personalized recommendation systems. This workshop is designed to serve a broad audience\u2014from beginners eager to grasp foundational RL concepts to practitioners seeking to deepen their technical expertise through applied projects. These projects will range from developing simple classical RL game environments to practical financial domain use cases such as using RL sequential decision making for stock trading and asset portfolio optimization scenarios.", "description": "Over the course of this interactive session, participants will embark on a journey that begins with an introduction to the fundamental principles of RL, including Markov Decision Processes, reward structures, and the critical balance between exploration and exploitation. We will then transition into a series of hands-on coding exercises using popular frameworks such as Python\u2019s Gymnasium (formally referred to as Gym), PyTorch and RL open-source libraries such as Stable-baselines3 and Machin (to name a few). These exercises will enable attendees to implement classic algorithms like Q-learning, SARSA and deep learning algorithms such as actor-critic architectures and  policy gradients in controlled environments.\n\nReal-world case studies and example use cases\u2014ranging from classical simple simulated game environments to realistic decision-making systems in finance (such as stock trading and asset portfolio optimization use cases) - will illustrate how RL methodologies are applied in practice. During this workshop participants will develop and fine-tune RL models, gaining insights into performance evaluation, model tuning, and deployment strategies. Additionally, advanced topics such as deep RL architectures, on-policy and off-policy RL algorithms will be discussed and hacked interactively. \n\nThis workshop aims not only to impart theoretical knowledge but also to empower participants with the practical skills needed to design and deploy effective RL solutions. Join us to explore the dynamic world of reinforcement learning and to enhance your toolkit for solving complex, data-driven challenges. All the python libraries/packages, reference papers and data used in this workshop will be open sourced and made available in a Github repo (which will be made available soon).", "recording_license": "", "do_not_record": false, "persons": [{"code": "U8E8AQ", "name": "Ade Idowu", "avatar": "https://cfp.pydata.org/media/avatars/U8E8AQ_5EogayV.webp", "biography": "A lead software engineer and data scientist. Has over 15 years\u2019 experience in the development of software and AI/ML solutions. Pragmatic, analytic problem solver and builder of artificial intelligence solutions for business seeking efficiency and value. A passionate advocate of the development and use of ethical AI in products and services.", "public_name": "Ade Idowu", "guid": "ead9359b-9d00-5e88-9060-d5e4979e5cf6", "url": "https://cfp.pydata.org/london2025/speaker/U8E8AQ/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/W7WYMM/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/W7WYMM/", "attachments": []}], "Hardwick Hub": [{"guid": "1e1db4ad-c016-5fce-977e-11499ca312cf", "code": "V3CWEM", "id": 77234, "logo": null, "date": "2025-06-06T09:00:00+01:00", "start": "09:00", "duration": "01:30", "room": "Hardwick Hub", "slug": "london2025-77234-hands-on-with-apache-iceberg", "url": "https://cfp.pydata.org/london2025/talk/V3CWEM/", "title": "Hands-on with Apache Iceberg", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "You've probably heard the name Apache Iceberg by now. If it wasn't when Databricks reportedly spent 2 billion USD buying Tabular, it might have been when AWS announced S3 Tables built on Iceberg. But do you know what Apache Iceberg actually is? Or how you could start using it today? \n\nIn this tutorial, we will walk through an end-to-end example of writing and reading Iceberg data, while taking a few pitstops to demonstrate Iceberg's selling points.", "description": "**This tutorial is aimed at the data engineer who's somewhat familiar with cloud storage solutions such as S3, Azure Blob Storage or Google Cloud Storage. The tutorial will consist of fully-local components running in Docker and Jupyter notebooks. You will be able to replicate the environment locally and play around with it yourself. **\n\nPlease clone https://github.com/andersbogsnes/pydata-london-2025-hands-on-apache-iceberg and run the commands in the README.md before the workshop if possible!\n\nThe goal of this tutorial is to give you an understanding of what Apache Iceberg is and does. \n\nWe will write data in Iceberg format to an object store, taking the opportunity to demonstrate each of Iceberg's selling points. Finally, we will query the data using a variety of query engines to demonstrate the promises of Iceberg's interoperability.\n\n## Outline\n- Introduce some of the concepts needed to understand the why of Apache Iceberg\n  - A brief history of table formats\n  - A discussion of the importance of file formats\n- Introducing the dataset we will be working with\n- Writing data into Iceberg format - what is happening under the hood?\n- Demonstrating the main selling points of Iceberg and why you should care\n  - Schema Evolution\n  - Hidden Partitioning\n  - Time Travel\n  - Data Compaction\n- Querying the data\n  - Duckdb\n  - Polars\n  - Other query engines", "recording_license": "", "do_not_record": false, "persons": [{"code": "NMREM3", "name": "Anders Bogsnes", "avatar": "https://cfp.pydata.org/media/avatars/NMREM3_Glt5hJm.webp", "biography": "Anders is the Head of Investments Engineering at Nordea Asset Management and organizer of Pydata Copenhagen Meetup. He has a background as a ML Tech Lead and Python Enabler with an interest in data engineering, ML and ML Engineering. Hailing from Stavanger, Norway, he is currently located in Copenhagen, Denmark", "public_name": "Anders Bogsnes", "guid": "89f72c18-9831-5dea-8dc7-874af0e2533d", "url": "https://cfp.pydata.org/london2025/speaker/NMREM3/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/V3CWEM/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/V3CWEM/", "attachments": []}, {"guid": "60130c54-75a5-5261-b159-a8bc5da3f20b", "code": "BCKCMR", "id": 77400, "logo": "https://cfp.pydata.org/media/london2025/submissions/BCKCMR/Set_your_Python_code_free_Bp6YVh4.png", "date": "2025-06-06T11:00:00+01:00", "start": "11:00", "duration": "01:30", "room": "Hardwick Hub", "slug": "london2025-77400-package-your-python-code-as-a-cli", "url": "https://cfp.pydata.org/london2025/talk/BCKCMR/", "title": "Package Your Python Code as a CLI", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "Learn how to transform your Python code into a command-line tool. Jeroen Janssens, author of *[Data Science at the Command Line](https://jeroenjanssens.com/dsatcl)*, guides you through the process of turning your scripts into reusable, executable tools, integrating them into your data workflows and harnessing the power of the Unix command line.", "description": "If you're not sure whether this tutorial is for you, we recommend you watch Jeroen's talk [Embrace the Unix Command Line and Supercharge Your PyData Workflow](https://www.youtube.com/watch?v=siPGvvrfylQ).\n\n\n***Note: This tutorial assumes that you're using macOS or a Linux distribution. If you're using Windows, please [install WSL](https://learn.microsoft.com/en-us/windows/wsl/install) or [a suitable Docker image](https://jeroenjanssens.com/dsatcl/chapter-2-getting-started#docker-image).***\n\nAs your Python scripts evolve, turning them into command-line tools offers numerous benefits: reusability, testability, and greater efficiency. The Unix command line is a powerful environment, designed for combining tools, parallel execution, and working with massive data.\n\nThis hands-on tutorial will cover:\n\n- The Unix philosophy and its relevance to data science\n- How to convert Python code into a command-line tool\n    - Preparing your code for reuse\n    - Parsing command-line arguments\n    - Reading from standard input\n    - Making your tool executable and adding help options\n- Best practices for designing command-line interfaces\n- Upgrading from argv to argparse or Typer\n- Self-contained tools with uv\n\nThroughout the tutorial, we\u2019ll develop an actual command-line tool, starting with Python\u2019s standard library and later incorporating additional libraries. This tutorial is ideal for developers and researchers looking to enhance their workflows. No prior Unix knowledge is needed; essential concepts will be covered.\n\n## Resources\n\n- [Presentation](https://docs.google.com/presentation/d/14yhoWSaUf8RzKWQHQ426WAXbRXGAmSsMKyiFQrEACFo/edit?usp=sharing)\n- [Code](https://github.com/jeroenjanssens/python-cli-tutorial)", "recording_license": "", "do_not_record": false, "persons": [{"code": "CZ7KA3", "name": "Jeroen Janssens", "avatar": "https://cfp.pydata.org/media/avatars/CZ7KA3_3IQHone.webp", "biography": "Jeroen Janssens, PhD, is a Senior Developer Relations Engineer at Posit, PBC. His expertise lies in visualizing data, implementing machine learning models, and building solutions using Python, R, JavaScript, and Bash. He\u2019s passionate about open source and sharing knowledge. He\u2019s the author of [Python Polars: The Definitive Guide (O\u2019Reilly, 2025)](https://polarsguide.com) and [Data Science at the Command Line (O\u2019Reilly, 2021)](https://jeroenjanssens.com/dsatcl). Jeroen holds a PhD in machine learning from Tilburg University and an MSc in artificial intelligence from Maastricht University. He lives with his wife and two kids in Rotterdam, the Netherlands.", "public_name": "Jeroen Janssens", "guid": "8d40143f-008c-52fb-a58b-9380e45c28b4", "url": "https://cfp.pydata.org/london2025/speaker/CZ7KA3/"}, {"code": "E9BCYL", "name": "Thijs Nieuwdorp", "avatar": "https://cfp.pydata.org/media/avatars/E9BCYL_XPr1vr9.webp", "biography": "Thijs Nieuwdorp is the Lead Data Scientist at Xomnia in Amsterdam. His interest in the interaction between human and computer led him to an education in artificial intelligence at the Radboud University, after which he dove straight into the field of data science. At Xomnia he witnessed the birth of Polars as Ritchie Vink started working on it during his employment there and has been using it in his projects ever since. He enjoys figuring out complex data problems, optimizing existing solutions, and putting them to good use by implementing them into business processes. Outside work, Thijs enjoys exploring our world through hiking and traveling and exploring other worlds through books, games, and movies. He lives in Amsterdam with his partner, Paula.", "public_name": "Thijs Nieuwdorp", "guid": "83fd1ab1-5439-51dd-99b1-04df81776201", "url": "https://cfp.pydata.org/london2025/speaker/E9BCYL/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/BCKCMR/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/BCKCMR/", "attachments": []}, {"guid": "a84c7061-2a0e-51e8-9380-b7c9e37e6a42", "code": "R3UJN7", "id": 77795, "logo": "https://cfp.pydata.org/media/london2025/submissions/R3UJN7/transformers-inside-out_4_t3IFMKa.png", "date": "2025-06-06T13:30:00+01:00", "start": "13:30", "duration": "01:30", "room": "Hardwick Hub", "slug": "london2025-77795-transformers-inside-out-parts-1-2", "url": "https://cfp.pydata.org/london2025/talk/R3UJN7/", "title": "Transformers Inside Out (Parts 1 & 2)", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "Large Language Models like GPT4 are now a key part of the technology landsacpe, but how do they really work? And can you code them up at home? In this tutorial we'll create a simple GPT and train it on a simplified dataset of children's jokes. We'll work against a new set of transformer encoder flow diagrams that intuitively match the code, and look at visualisations of GPT's internal representations in order to better understand transformers inside out!", "description": "In this tutorial we\u2019ll work step by step through creating a simple GPT model in PyTorch.  We'll use simplified kids jokes to train it and see how it\u2019s internal representations evolve as it tries to tell (hopefully) funnier and funnier jokes.  Intermediate Python programming skills are assumed for this tutorial, as well as a basic understanding of matrix algebra.  No familiarity with PyTorch, GPT or LLMs is assumed.\n\nPlease clone https://github.com/karpathy/nanoGPT onto your laptop and follow the README.md instructions to get the dependencies installed `pip install torch numpy transformers datasets tiktoken wandb tqdm` before coming to the session,", "recording_license": "", "do_not_record": false, "persons": [{"code": "MZ3Y7X", "name": "Sam Joseph", "avatar": "https://cfp.pydata.org/media/avatars/MZ3Y7X_HfJrXt3.webp", "biography": "I am currently the lead AI developer at Qualis Flow, a company that is using the latest AI tech to help decarbonise the construction industry. Previously I was the CTO of NeuroGrid Ltd., a software consultancy firm providing data science and software engineering services. Before that I was a CoFounder of AgileVentures, where as the CTO we supported multiple open source international charity projects. Further back I was Head of Education and Engineering at the Makers Academy bootcamp, following many years as Associate Professor in Computer Science at Hawaii Pacific University, where I taught courses on AI, mobile, games and software engineering.  It all started with a Ph.D. in Machine Learning from the University of Edinburgh.", "public_name": "Sam Joseph", "guid": "5365fa19-af20-5ecf-bd7f-e6d4ea783f7c", "url": "https://cfp.pydata.org/london2025/speaker/MZ3Y7X/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/R3UJN7/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/R3UJN7/", "attachments": []}, {"guid": "36bb6c7f-4974-5f2c-9d5d-9f0a3d942ab0", "code": "PRDCGC", "id": 77799, "logo": null, "date": "2025-06-06T15:30:00+01:00", "start": "15:30", "duration": "01:30", "room": "Hardwick Hub", "slug": "london2025-77799-graph-theory-for-multi-agent-integration-showcase-clinical-use-cases", "url": "https://cfp.pydata.org/london2025/talk/PRDCGC/", "title": "Graph Theory for Multi-Agent Integration: Showcase Clinical Use Cases", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "Graph theory is a well-known concept for algorithms and can be used to orchestrate the building of multi-model pipelines. By translating tasks and dependencies into a Directed Acyclic Graph, we can orchestrate diverse AI models, including NLP, vision, and recommendation capabilities. This tutorial provides a step-by-step approach to designing graph-based AI model pipelines, focusing on clinical use cases from the field.", "description": "I will start by providing an introduction to orchestrating multiple models in a single workflow and explaining why conventional linear pipelines fail to meet complex tasks. Next, we\u2019ll outline how graph theory addresses clinical tasks such as patient document workflow, starting from doctor notes, blood results analysis, and discharge letters. Finally, we will discuss how to scale the concept of multi-model integration in any field. \nThe tutorial will include live code demos, I will provide a GitHub repository with the tutorial code.", "recording_license": "", "do_not_record": false, "persons": [{"code": "E338FM", "name": "Ahmad Albarqawi", "avatar": "https://cfp.pydata.org/media/avatars/E338FM_CbZN93J.webp", "biography": "Ahmad is a data scientist with a Master from Illinois at Urbana-Champaign. He worked on a study to accelerate clinical tasks using language models and founded MedWrite AI company.\nAhmad is an active contributor to GitHub and has published open-source projects adopted by thousands of developers. He also writes articles about machine learning in various outlets to bridge the gap between research and practical applications.", "public_name": "Ahmad Albarqawi", "guid": "0f3c12b7-476d-5e2f-a002-fcee55cf74e8", "url": "https://cfp.pydata.org/london2025/speaker/E338FM/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/PRDCGC/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/PRDCGC/", "attachments": []}]}}, {"index": 2, "date": "2025-06-07", "day_start": "2025-06-07T04:00:00+01:00", "day_end": "2025-06-08T03:59:00+01:00", "rooms": {"Grand Hall": [{"guid": "4e47d344-9147-5b7e-b92c-59bac9270a52", "code": "ZLTHE9", "id": 77023, "logo": null, "date": "2025-06-07T09:00:00+01:00", "start": "09:00", "duration": "00:55", "room": "Grand Hall", "slug": "london2025-77023-opening-notes-keynote-keep-calm-and-data-on-being-a-data-science-practitioner-in-the-era-of-ai-proliferation", "url": "https://cfp.pydata.org/london2025/talk/ZLTHE9/", "title": "Opening Notes & Keynote: Keep Calm and Data On: Being a data science practitioner in the era of AI proliferation", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Since the end of 2022, the AI space has reached unprecedented velocity, scale and proliferation. When it seems like everyone (and their dog) is talking about AI, how should those of us who've been working in Machine Learning, Data Science (and AI) as domain experts look to navigate the conversation? In this talk, Leanne will aim to shine a light on the impact the AI arms race is having on our field, the reality of what it means to be a practitioner and some principles to stick by to help traverse what may appear to be a time of panic.", "description": "", "recording_license": "", "do_not_record": false, "persons": [{"code": "XCGHSJ", "name": "Leanne Fitzpatrick", "avatar": "https://cfp.pydata.org/media/avatars/XCGHSJ_S67zxZZ.webp", "biography": "Leanne is Director of Data Science & AI at the Financial Times and is a passionate, experienced data leader having built and developed empowered data science and analytics teams for a variety of businesses; from startups to large organisations. Leanne is in her element when developing and implementing strategic, technical and cultural solutions to getting data & AI capabilities into the operational ecosystem. She is an active part of the data and technology community, sharing innovation and insights to encourage best practice, and has held various roles as an Advisory Panel Board Member for MSc & PhD Data Science & AI Programmes. Outside of all things data you can generally find Leanne chasing after her toddler and/or her dog, enjoying the latest sci-fi & fantasy books, and engaging in her latest crafting project.", "public_name": "Leanne Fitzpatrick", "guid": "52e5a04c-6195-53c0-aad2-9375a7b4ee38", "url": "https://cfp.pydata.org/london2025/speaker/XCGHSJ/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/ZLTHE9/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/ZLTHE9/", "attachments": []}, {"guid": "b5e15a31-3ad8-5890-bda8-c9c59d6c1cc9", "code": "EJWBPU", "id": 77501, "logo": null, "date": "2025-06-07T10:20:00+01:00", "start": "10:20", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77501-multi-task-learning-for-fraud-detection-from-trees-to-mlps", "url": "https://cfp.pydata.org/london2025/talk/EJWBPU/", "title": "Multi-Task Learning for Fraud detection: From Trees to MLPs", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "This talk will present Monzo's exploration of multi-task deep learning to enhance our real-time fraud detection systems. I will outline the challenges of card fraud detection, and explain the limitations of traditional gradient boosted decision tree models in terms of generalisation to rare fraud subtypes. This will motivate the use of multi-task learning, which leverages shared dense representations across fraud sub-tasks. By consolidating multiple specialist learners into a single model, we observe improved performance on less prevalent fraud types, leading to better generalisability, scalability, and robustness. I will also share results from testing multi-task models within our fraud detection infrastructure.", "description": "Fraud detection is a complex problem due to the constant evolution of fraudulent behaviour, significant data imbalance, and the requirement for real-time decision-making. Accurate detection of fraud and financial crime is crucial for protecting customers and maintaining trust in the banking system. Traditional fraud detection often relies on binary classification models using tree-based algorithms. While these models offer good predictive performance and scalability, they can struggle to capture shared information across different types of fraud. This often results in the need for multiple specialist models, each requiring individual maintenance and retraining.\nMulti-task learning, a deep learning approach, offers a potential solution by exploiting the commonalities between related fraud problems to improve overall prediction accuracy. Multi-task learning is particularly relevant where multiple prediction targets share underlying patterns. In fraud, different sub-types (e.g., identity theft, account takeover, coercion) frequently exhibit overlapping characteristics. A model trained on multiple signals simultaneously may be better at identifying subtle patterns that individual models might miss. Our hypothesis is that this should lead to increased generalisation, allowing multi-task models to adapt more effectively to new fraud patterns and reduce maintenance overhead.\nIn this talk, I will detail how we have tested this hypothesis at Monzo by applying multi-task learning to the problem of unauthorized card fraud. I will discuss the models we developed and the results we have observed in controlled offline settings..", "recording_license": "", "do_not_record": false, "persons": [{"code": "B9REND", "name": "Callum Court", "avatar": null, "biography": null, "public_name": "Callum Court", "guid": "02d45650-1bab-5d92-a2f1-36cb88bbf7ab", "url": "https://cfp.pydata.org/london2025/speaker/B9REND/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/EJWBPU/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/EJWBPU/", "attachments": []}, {"guid": "20d69cba-58ea-5835-9a19-337828891b16", "code": "Q37AUM", "id": 77056, "logo": null, "date": "2025-06-07T11:05:00+01:00", "start": "11:05", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77056-ai-agents-testing-how-to-evaluate-the-unpredictable", "url": "https://cfp.pydata.org/london2025/talk/Q37AUM/", "title": "AI agents testing: How to evaluate the unpredictable", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "AI agents and multi-step workflows are powerful, but testing them can be tricky. This talk explores practical ways to test these complex systems \u2014 like running multi-step simulations, checking tool calls, and using LLMs for evaluation. You'll also learn how to prioritize what to test and set up session-level evaluations with open-source tools.", "description": "AI agents and multi-step AI workflows are incredibly powerful \u2014 but they can also be risky to deploy and even scarier to change. You don\u2019t want your users to be the ones finding the bugs, but it's often not clear how to test such complex systems in advance. Traditional unit tests and ML evaluation methods don\u2019t really work when interactions unfold unpredictably across an entire session.\n\nIn this talk, we\u2019ll break down practical ways to test compound AI systems, including chatbots and AI agents. We'll cover:\n- Strategies for testing complex systems\n- Specific approaches, from testing the correctness of tool calls to running multi-step simulations.\n- How to automate evaluation using both LLM-as-a-judge and deterministic checks.\n- How to prioritize testing, balancing edge cases, adversarial scenarios, and core user experiences.\n\nWe'll also share how you can configure and run session-level evaluation using open-source tools.", "recording_license": "", "do_not_record": false, "persons": [{"code": "VHQ7QV", "name": "Emeli Dral", "avatar": "https://cfp.pydata.org/media/avatars/VHQ7QV_XPhCZwP.webp", "biography": "Emeli Dral is a Co-founder and CTO at Evidently AI, a startup developing open-source tools to evaluate, test, and monitor the performance of AI systems.\n\nEarlier, she co-founded an industrial AI startup and served as the Chief Data Scientist at Yandex Data Factory. She led over 50 applied ML projects for various industries - from banking to manufacturing. Emeli is a data science lecturer at Harbour.Space University, and a co-author of the Machine Learning and Data Analysis curriculum at Coursera with over 100,000 students.", "public_name": "Emeli Dral", "guid": "bec2c445-3c38-55e7-870a-4ee6733effa9", "url": "https://cfp.pydata.org/london2025/speaker/VHQ7QV/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/Q37AUM/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/Q37AUM/", "attachments": []}, {"guid": "71d6c80e-46d4-5caa-aff0-ffca8e5e9896", "code": "ZSLNXD", "id": 77299, "logo": null, "date": "2025-06-07T11:50:00+01:00", "start": "11:50", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77299-bringing-stories-to-life-with-ai-data-streaming-and-generative-agents", "url": "https://cfp.pydata.org/london2025/talk/ZSLNXD/", "title": "Bringing stories to life with AI, data streaming and generative agents", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Explore how AI-powered Generative Agents can evolve in real time using live data streams. Inspired by Stanford's 'Generative Agents' paper, this session dives into building dynamic, AI-driven worlds with Apache Kafka, Flink, and Iceberg - plus LLMs, RAG, and Python. Demos and practical examples included!", "description": "Storytelling has always been a way to connect and imagine new worlds. Now, with Generative Agents - AI-powered characters that can think, act, and adapt - we can take storytelling to a whole new level. But what if these agents could change and grow in real time, driven by live data streams?\n\nInspired by the Standford's paper \"Generative Agents: Interactive Simulacra of Human Behavior\",\u00a0 this session explores how to build dynamic, AI-driven worlds using Apache Kafka, Apache Flink, and Apache Iceberg. We'll use a Large Language Model to power\u00a0 for conversation and agent decision-making,\u00a0 integrate Retrieval-Augmented Generation (RAG) for memory storage and retrieval, and use Python to tie it all together. Along the way, we\u2019ll examine different approaches for data processing, storage, and analysis.\n\nBy the end, you\u2019ll see how data streaming and AI can work together to create lively, evolving virtual communities. Whether you\u2019re into gaming, simulations, research or just exploring what\u2019s possible, this session will give you ideas for building something amazing.", "recording_license": "", "do_not_record": false, "persons": [{"code": "LLBXBT", "name": "Olena Kutsenko", "avatar": "https://cfp.pydata.org/media/avatars/LLBXBT_Q3Gez1v.webp", "biography": "Olena is a Staff Developer Advocate at Confluent and a recognized expert in data streaming and analytics. With two decades of experience in software engineering, she has built mission-critical applications, led high-performing teams, and driven large-scale technology adoption at industry leaders like Nokia, HERE Technologies, AWS, and Aiven.\n\nA passionate advocate for real-time data processing and AI-driven applications, Olena empowers developers and organizations to use the power of streaming data. She is an AWS Community Builder, a dedicated mentor, and a volunteer instructor at a nonprofit tech school, helping to shape the next generation of engineers.\n\nAs an international speaker and thought leader, Olena regularly presents at top global conferences, sharing deep technical insights and hands-on expertise. Whether through her talks, workshops, or content, she is committed to making complex technologies accessible and inspiring innovation in the developer community.", "public_name": "Olena Kutsenko", "guid": "aa43d05d-1405-5f91-a0a4-9845c7b38f2c", "url": "https://cfp.pydata.org/london2025/speaker/LLBXBT/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/ZSLNXD/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/ZSLNXD/", "attachments": []}, {"guid": "4cf339d6-0a79-5a55-9e64-35301d02f74c", "code": "DSAQW9", "id": 77057, "logo": null, "date": "2025-06-07T13:40:00+01:00", "start": "13:40", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77057-keynote-from-next-token-prediction-to-reasoning-and-beyond", "url": "https://cfp.pydata.org/london2025/talk/DSAQW9/", "title": "Keynote- From Next Token Prediction to Reasoning and Beyond", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Large Language Models (LLMs) have grown into prominence as some of the most popular technological artifacts of the day. This talk will provide a highly accessible and visual overview of LLM concepts relevant to today's data professionals. This includes looking at present-day Transformer architectures, tokenizers, reward models, reasoning LLMs, agentic trajectories, and the various training stages of a large language model including next-word prediction, instruction-tuning, preference-tuning, and reinforcement learning.", "description": "Saturday at 13:40 in the Grand Hall!", "recording_license": "", "do_not_record": false, "persons": [{"code": "3C9ELJ", "name": "Jay Alammar", "avatar": "https://cfp.pydata.org/media/avatars/3C9ELJ_Jz0mfNJ.webp", "biography": "Jay Alammar is co-author of Hands-On Large Language Models, published by O'Reilly Media. and Director and Engineering Fellow at Cohere (a pioneering creator of large language models).\n \nThrough his popular AI/ML blog, Jay has helped millions of researchers and engineers visually understand machine learning tools and concepts (e.g., The Illustrated Transformers, BERT, DeepSeek-R1, and others).", "public_name": "Jay Alammar", "guid": "1d514999-e229-5d72-b9e6-94093ea86d27", "url": "https://cfp.pydata.org/london2025/speaker/3C9ELJ/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/DSAQW9/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/DSAQW9/", "attachments": []}, {"guid": "df389d30-c2cf-5f6b-8090-32266c5cdd61", "code": "RMQUDE", "id": 77093, "logo": null, "date": "2025-06-07T14:45:00+01:00", "start": "14:45", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77093-conquering-pdfs-document-understanding-beyond-plain-text", "url": "https://cfp.pydata.org/london2025/talk/RMQUDE/", "title": "Conquering PDFs: document understanding beyond plain text", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "NLP and data science could be so easy if all of our data came as clean and plain text. But in practice, a lot of it is hidden away in PDFs, Word documents, scans and other formats that have been a nightmare to work with. In this talk, I'll present a new and modular approach for building robust document understanding systems, using state-of-the-art models and the awesome Python ecosystem. I'll show you how you can go from PDFs to structured data and even build fully custom information extraction pipelines for your specific use case.", "description": "For the practical examples, I'll be using spaCy, and the new Docling library and layout analysis models. I'll also cover Optical Character Recognition (OCR) for image-based text, how to convert tabular data to pandas DataFrames, and strategies for creating training and evaluation data for information extraction tasks like text classification and entity recognition using PDFs and other documents as inputs.", "recording_license": "", "do_not_record": false, "persons": [{"code": "FZKG9N", "name": "Ines Montani", "avatar": "https://cfp.pydata.org/media/avatars/FZKG9N_7il65fA.webp", "biography": "Ines Montani is a developer specializing in tools for AI and NLP technology. She\u2019s the co-founder and CEO of Explosion and a core developer of spaCy, a popular open-source library for Natural Language Processing in Python, and Prodigy, a modern annotation tool for creating training data for machine learning models.", "public_name": "Ines Montani", "guid": "974b319d-258e-5977-a654-ce3a696a831d", "url": "https://cfp.pydata.org/london2025/speaker/FZKG9N/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/RMQUDE/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/RMQUDE/", "attachments": []}, {"guid": "e0ff38b7-1a51-55e7-b101-751ddaf83265", "code": "3QAKDE", "id": 77793, "logo": null, "date": "2025-06-07T15:30:00+01:00", "start": "15:30", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77793-not-another-llm-talk-practical-lessons-from-building-a-real-world-adverse-media-pipeline", "url": "https://cfp.pydata.org/london2025/talk/3QAKDE/", "title": "Not Another LLM Talk\u2026 Practical Lessons from Building a Real-World Adverse Media Pipeline", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "LLMs are magical\u2014until they aren\u2019t. Extracting adverse media entities might sound straightforward, but throw in hallucinations, inconsistent outputs, and skyrocketing API costs, and suddenly, that sleek prototype turns into a production nightmare.\n\nOur adverse media pipeline monitors over 1 million articles a day, sifting through vast amounts of news to identify reports of crimes linked to financial bad actors, money laundering, and other risks. Thanks to GenAI and LLMs, we can tackle this problem in new ways\u2014but deploying these models at scale comes with its own set of challenges: ensuring accuracy, controlling costs, and staying compliant in highly regulated industries.\n\nIn this talk, we\u2019ll take you inside our journey to production, exploring the real-world challenges we faced through the lens of key personas: Cautious Claire, the compliance officer who doesn\u2019t trust black-box AI; Magic Mike, the sales lead who thinks LLMs can do anything; Just-Fine-Tune Jenny, the PM convinced fine-tuning will solve everything; Reinventing Ryan, the engineer reinventing the wheel; and Paranoid Pete, the security lead fearing data leaks.\n\nExpect practical insights, cautionary tales, and real-world lessons on making LLMs reliable, scalable, and production-ready. If you've ever wondered why your pipeline works perfectly in a Jupyter notebook but falls apart in production, this talk is for you.", "description": "We\u2019ve all seen the hype\u2014LLMs are transforming workflows, revolutionising automation, and changing how we extract insights from text. But when it comes to real-world production systems, things get messy fast.\n\nOur adverse media pipeline processes over 1 million news articles a day, scanning for reports of crimes linked to financial bad actors, money laundering, and other regulatory risks. With GenAI and LLMs, we have powerful new tools to automate entity extraction and risk detection. However, deploying these models at scale brings a whole new set of challenges:\n\n\ud83d\udee0\ufe0f Breaking Down the Problem: Why structuring tasks into modular prompts and chaining responses is key to accuracy.\n\ud83d\udcb0 Cost vs. Performance Trade-offs: How different prompting strategies and model choices (API-based vs. fine-tuned local models) impact cost and scalability.\n\ud83e\uddd0 Validation & Governance: From handling hallucinations to dealing with sensitive data while staying within regulatory frameworks.\n\ud83e\uddf0 Open Source & Practical Tooling: How to build reliable, cost-efficient LLM pipelines using tools in the Python ecosystem\n\nTo illustrate the real-world challenges of getting an LLM pipeline into production, we\u2019ll introduce a cast of personas that will feel all too familiar:\n\n- Cautious Claire \u2013 the compliance officer who doesn\u2019t trust AI black boxes.\n- Magic Mike \u2013 the sales lead who thinks LLMs can do anything.\n- Just-Fine-Tune Jenny \u2013 the product manager convinced fine-tuning will fix everything.\n- Reinventing Ryan \u2013 the engineer determined to build everything from scratch.\n- Paranoid Pete \u2013 the security lead who fears LLMs will leak all the secrets.\n\nThrough their perspectives, we\u2019ll explore the tensions, trade-offs, and hard-won lessons of taking an LLM-powered pipeline from a Jupyter notebook to a production-grade system. Expect practical insights through a real-world case study, and cautionary tales to help you navigate your own deployment challenges.\n\nWho Should Attend?\nThis talk is for ML engineers, data scientists, software engineers, and product managers working with LLMs in production or planning to do so. Whether you\u2019re evaluating architectures, struggling with cost control, or trying to balance compliance concerns, you\u2019ll walk away with battle-tested strategies for building scalable, reliable, and regulation-friendly LLM pipelines.", "recording_license": "", "do_not_record": false, "persons": [{"code": "P98FWZ", "name": "Adam Hill", "avatar": "https://cfp.pydata.org/media/avatars/P98FWZ_j3NWKKu.webp", "biography": "Adam is the Interim Director of Data Science at ComplyAdvantage, where he leads a brilliant team tackling financial crime with advanced analytics, large-scale systems, and the latest in generative and agentic AI.\n\nBefore that, he spent eight years in the smart cities space at HAL24K, helping governments and infrastructure providers make better decisions with their data. Along the way, he built and led a team of ten data scientists, and helped launch four spin-out ventures\u2014proving that good data science can move the dial in the real world.\n\nA recovering astrophysicist, Adam spent a decade analysing data from space telescopes in search of new cosmic phenomena. He\u2019s since redirected that curiosity toward Earth-based problems.\n\nAdam is an active member of the PyData community, the founder of PyData Southampton, and a long-time volunteer with DataKind UK, supporting charities and NGOs with pro-bono data science.", "public_name": "Adam Hill", "guid": "e6f72fe2-5449-5a2d-a460-68562489732b", "url": "https://cfp.pydata.org/london2025/speaker/P98FWZ/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/3QAKDE/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/3QAKDE/", "attachments": []}, {"guid": "d83cf5c2-20c7-595b-8614-41d596661c17", "code": "XDLFR3", "id": 77086, "logo": null, "date": "2025-06-07T16:15:00+01:00", "start": "16:15", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77086-successful-projects-through-a-bit-of-rebellion", "url": "https://cfp.pydata.org/london2025/talk/XDLFR3/", "title": "Successful Projects through a bit of Rebellion", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "This talk is for leaders who want new techniques to improve their success rates. In the last 15 months I've built a private data science peer mentorship group where we discuss rebellious ideas that improve our ability to make meaningful change in organisations of all sizes.\n\nAs a leader you've no doubt had trouble defining new projects (perhaps you've been asked - \"add ChatGPT!\"), getting buy-in, building support, defining defensible metrics and milestones, hiring, developing your team, dealing with conflict, avoiding overload and ultimately delivering valuable projects that are adopted by the business. I'll share advice across all of these areas based on 25 years of personal experience and the topics we've discussed in my leadership community.\n\nYou'll walk away with new ideas, perspectives and references that ought to change how to work with your team and organisation.", "description": "This talk is for leaders who want new techniques to improve their success rates. In the last 15 months I've built a private data science peer mentorship group where we discuss rebellious ideas that improve our ability to make meaningful change in organisations of all sizes.\n\nAs a leader you've no doubt had trouble defining new projects (perhaps you've been asked - \"add ChatGPT!\"), getting buy-in, building support, defining defensible metrics and milestones, hiring, developing your team, dealing with conflict, avoiding overload and ultimately delivering valuable projects that are adopted by the business. I'll share advice across all of these areas based on 25 years of personal experience and the topics we've discussed in my leadership community.\n\nYou'll walk away with new ideas, perspectives and references that ought to change how to work with your team and organisation.", "recording_license": "", "do_not_record": false, "persons": [{"code": "QPPQER", "name": "Ian Ozsvald", "avatar": "https://cfp.pydata.org/media/avatars/QPPQER_PgOYnuf.webp", "biography": "Ian is a Chief Data Scientist, founder of the RebelAI leadership community, has co-founded and built the annual PyDataLondon conference raising $100k+ annually for the open source movement along with the associated 14,000+ member monthly meetup. Using data science he's helped clients find $2M in recoverable fraud, created the core IP which opened funding rounds for automated recruitment start-ups and diagnosed how major media companies can better supply recommendations to viewers. He gives conference talks internationally often as keynote speaker and is the author of the bestselling O'Reilly book High Performance Python (3rd edition). He has over 26 years of experience as a senior data science leader, trainer and team coach. For fun he's walked by his high-energy Springer Spaniel, surfs the Cornish coast and drinks fine coffee. Past talks and articles can be found at: \n\n* https://www.linkedin.com/in/ianozsvald/\n* https://ianozsvald.com/\n* https://notanumber.email/\n* https://github.com/ianozsvald/\n* https://twitter.com/ianozsvald", "public_name": "Ian Ozsvald", "guid": "325a0c0d-2fce-5f52-b006-bde853e03734", "url": "https://cfp.pydata.org/london2025/speaker/QPPQER/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/XDLFR3/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/XDLFR3/", "attachments": []}, {"guid": "75e5ee53-a614-539a-8e5e-caafc98b0c88", "code": "SZ89HM", "id": 77061, "logo": null, "date": "2025-06-07T17:00:00+01:00", "start": "17:00", "duration": "01:00", "room": "Grand Hall", "slug": "london2025-77061-pydata-london-2025-happy-hour", "url": "https://cfp.pydata.org/london2025/talk/SZ89HM/", "title": "PyData London 2025 Happy Hour", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Join us for drinks, snacks and networking from 5-6pm.", "description": "Big thank you to our social sponsors NVIDIA and Anaconda!!", "recording_license": "", "do_not_record": true, "persons": [], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/SZ89HM/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/SZ89HM/", "attachments": []}], "Doddington Forum": [{"guid": "246312dd-59b2-5325-9eb0-cba12e5148e1", "code": "HAARJB", "id": 77109, "logo": null, "date": "2025-06-07T10:20:00+01:00", "start": "10:20", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77109-why-you-should-stop-pretending-your-sparse-data-is-dense", "url": "https://cfp.pydata.org/london2025/talk/HAARJB/", "title": "Why you should stop pretending your sparse data is dense", "subtitle": "", "track": "Quant Finance Track Sponsored by Man Group", "type": "Talk", "language": "en", "abstract": "Lots of data in the real world has missing values, but historically prevalent data science tools have had limited support for such data. This talk will compare traditional numerical approaches, the more modern alternative Arrow, as well as ArcticDB, the client-side Dataframe database developed at Man Group.", "description": "Data in the real world is complex, and one form that complexity often takes is missing values. In the Dataframe world, this can mean that your data is no longer representable as a nice rectangle of dense values. So what are the options?\n\nPandas has historically dominated the data science ecosystem, and offers a couple of alternatives. Certain datatypes, such as floats, timestamps, and strings, have a \"natural\" representation for missing values (NaN, NaT, and None respectively). Integer types present more of a challenge, as for a given bit-width, all binary values represent legitimate values. Pandas offers SparseArray with a user-defined fill-value. This is memory efficient, but it is still not possible to differentiate between a missing value, and a value that is present and equal to the fill value.\n\nArrow is the modern alternative in-memory Dataframe representation format, and it comes equipped with in-built handling for missing values that do not depend on the column type in any way. However, the Arrow sparse data representation has it's own drawbacks in terms of both memory usage and processing speed.\n\nThis talk will compare and contrast, with examples, the above two approaches, along with the more sophisticated approach taken in ArcticDB. As a database, ArcticDB faces all of the same challenges as Pandas and Arrow for its in-memory processing, plus the extra consideration of efficiently serialising these data structures to disk.", "recording_license": "", "do_not_record": false, "persons": [{"code": "JFPLBX", "name": "Alex Owens", "avatar": "https://cfp.pydata.org/media/avatars/JFPLBX_GLqYUAT.webp", "biography": "Alex Owens has been working in a combination of Python and C++ for the past 8 years. For the last 3 and a half of those, he has been a senior engineer on the new open-source Dataframe database, ArcticDB, which is backed by long-time Python enthusiasts Man Group and Bloomberg", "public_name": "Alex Owens", "guid": "f428e80f-963b-5da8-9554-49e664845b42", "url": "https://cfp.pydata.org/london2025/speaker/JFPLBX/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/HAARJB/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/HAARJB/", "attachments": []}, {"guid": "0c92bae7-5f50-535d-9f1b-5c7e8691da1c", "code": "9YUDVW", "id": 77730, "logo": null, "date": "2025-06-07T11:05:00+01:00", "start": "11:05", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77730-how-we-unified-feature-engineering-across-data-and-backend-at-monzo", "url": "https://cfp.pydata.org/london2025/talk/9YUDVW/", "title": "How we unified feature engineering across data and backend at Monzo", "subtitle": "", "track": "Quant Finance Track Sponsored by Man Group", "type": "Talk", "language": "en", "abstract": "Deep dive into how Monzo reduced the effort it takes to generate point-in-time correct features for model development and productionise them with realtime streaming using our event-driven architecture.", "description": "Join us for an in-depth exploration of Monzo' approach to feature engineering. This session will jump into the methodologies we use to streamline the creation of point-in-time correct features for model development. We will show how these features are transitioned into production environments, using real-time streaming powered by our event-driven architecture. Discover how we overcame challenges, reduced development time, and ensured data accuracy/consistency.", "recording_license": "", "do_not_record": false, "persons": [{"code": "MTFXS3", "name": "Alex Jones", "avatar": "https://cfp.pydata.org/media/avatars/MTFXS3_a2JkJoW.webp", "biography": "By day, Alex heads up the Machine Learning platform at Monzo, tackling exciting challenges in the fintech space. He also enjoys bringing people together as a host for the MLOps Community London, connecting peers and advancing the conversation around production ML.", "public_name": "Alex Jones", "guid": "862d241a-c237-5206-bf95-91b398cb6f8b", "url": "https://cfp.pydata.org/london2025/speaker/MTFXS3/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/9YUDVW/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/9YUDVW/", "attachments": []}, {"guid": "0529f33c-1c55-5761-a14b-5d1cfc88c1df", "code": "G9U7H8", "id": 77282, "logo": null, "date": "2025-06-07T11:50:00+01:00", "start": "11:50", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77282-enhancing-fraud-detection-with-llm-generated-profiles-from-analyst-efficiency-to-model-performance", "url": "https://cfp.pydata.org/london2025/talk/G9U7H8/", "title": "Enhancing Fraud Detection with LLM-Generated Profiles: From Analyst Efficiency to Model Performance", "subtitle": "", "track": "Quant Finance Track Sponsored by Man Group", "type": "Talk", "language": "en", "abstract": "This talk explores how leveraging Large Language Models (LLMs) to generate structured customer profile summaries improved both compliance analyst workflows and fraud scoring models at a financial institution. Attendees will learn how embeddings derived from LLM-generated narratives outperformed traditional manual feature engineering and raw text embeddings, offering insights into practical applications of NLP in fraud detection.", "description": "Objective:\n\nFraud detection systems often rely on manually crafted features or text embeddings of unstructured texts, which may miss nuanced patterns in unstructured data. This talk presents a case study where LLM-generated customer profiles\u2014summarising transaction history, documents, interaction history and related profiles\u2014were used to (1) accelerate compliance reviews and (2) extract embeddings that boosted fraud model performance and sped up its development.\n\nOutline:\n* 0-10 mins: Introduction to challenges in fraud detection: manual inefficiencies and limitations of traditional feature engineering.\n* 10-20 mins: Methodology: Designing LLM-generated profiles to unify structured/unstructured data, and embedding extraction.\n* 20-30 mins: Results: How embeddings of the LLM-generated summaries captured contextual relationships (e.g., subtle transaction-document inconsistencies) better than raw text embeddings or manual features, lessons learned, scalability considerations\n\nKey Takeaways:\n* LLMs can transform unstructured data into actionable insights for both human analysts and ML models.\nEmbeddings from LLM-generated summaries may outperform naive text embeddings by capturing synthesized context and reducing noise.\n* Practical strategies to integrate LLMs into existing fraud detection pipelines without disrupting workflows.\n\nWhy It Matters:\nThis approach bridges the gap between unstructured data utilization and interpretable model improvements, offering a scalable approach for institutions implementing LLM-based solutions. \n\nBackground Knowledge:\nBasic understanding of NLP (e.g., embeddings) and supervised learning. No advanced LLM expertise is required.\n\nAudience:\nData scientists, ML engineers, and fraud analysts familiar with basic NLP/ML concepts. Ideal for those exploring NLP applications in finance or seeking alternatives to manual feature engineering.", "recording_license": "", "do_not_record": false, "persons": [{"code": "ASWEDK", "name": "Radion Bikmukhamedov", "avatar": null, "biography": "Radion Bikmukhamedov is a Machine Learning Engineer in ANNA Money's Financial Crime Prevention unit, specializing in operationalizing fraud detection systems that safeguard millions of monthly transactions and save thousands of hours of manual labour by automating fraud analysts's tasks. Over 6 years, he's architected NLP and ensemble model pipelines using Python's ML stack paired with MLOps tools (MLflow, DVC, KServe, Feast) to automate financial crime detection at scale.", "public_name": "Radion Bikmukhamedov", "guid": "18a98cf7-5c72-5d89-92de-19c49bc9c316", "url": "https://cfp.pydata.org/london2025/speaker/ASWEDK/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/G9U7H8/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/G9U7H8/", "attachments": []}, {"guid": "810377d0-2d63-5cdf-9477-c952a76d6ac8", "code": "PWHCFA", "id": 77513, "logo": null, "date": "2025-06-07T14:45:00+01:00", "start": "14:45", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77513-tackling-data-challenges-for-scaling-multi-agent-genai-apps-with-python", "url": "https://cfp.pydata.org/london2025/talk/PWHCFA/", "title": "Tackling Data Challenges for Scaling Multi-Agent GenAI Apps with Python", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "The use of multiple Large Language Models (LLMs) working together perform complex tasks, known as multi-agent systems, has gained significant traction. While orchestration frameworks like LangGraph and Semantic Kernel can streamline orchestration and coordination among agents, developing large-scale, production-grade systems can bring a host of data challenges. Issues such as supporting multi-tenancy, preserving transactional integrity and state, and managing reliable asynchronous function calls while scaling efficiently can be difficult to navigate.\n\nLeveraging insights from practical experiences in the Azure Cosmos DB engineering team, this talk will guide you through key considerations and best practices for storing, managing, and leveraging data in multi-agent applications at any scale. You\u2019ll learn how to understand core multi-agent concepts and architectures, manage statefulness and conversation histories, personalize agents through retrieval-augmented generation (RAG), and effectively integrate APIs and function calls.\n\nAimed at developers, architects, and data scientists at all skill levels, this session will show you how to take your multi-agent systems from the lab to full-scale production deployments, ready to solve real-world problems. We\u2019ll also walk through code implementations that can be quickly and easily put into practice, all in Python.", "description": "The use of multiple Large Language Models (LLMs) working together perform complex tasks, known as multi-agent systems, has gained significant traction. While orchestration frameworks like LangGraph and Semantic Kernel can streamline orchestration and coordination among agents, developing large-scale, production-grade systems can bring a host of data challenges. Issues such as supporting multi-tenancy, preserving transactional integrity and state, and managing reliable asynchronous function calls while scaling efficiently can be difficult to navigate.\n\nLeveraging insights from practical experiences in the Azure Cosmos DB engineering team, this talk will guide you through key considerations and best practices for storing, managing, and leveraging data in multi-agent applications at any scale. You\u2019ll learn how to understand core multi-agent concepts and architectures, manage statefulness and conversation histories, personalize agents through retrieval-augmented generation (RAG), and effectively integrate APIs and function calls.\n\nAimed at developers, architects, and data scientists at all skill levels, this session will show you how to take your multi-agent systems from the lab to full-scale production deployments, ready to solve real-world problems. We\u2019ll also walk through code implementations that can be quickly and easily put into practice, all in Python.", "recording_license": "", "do_not_record": false, "persons": [{"code": "AGYUFC", "name": "Theo van Kraay", "avatar": "https://cfp.pydata.org/media/avatars/AGYUFC_wZK7gY7.webp", "biography": "Theo is a Principal Program Manager in the Azure Cosmos DB Engineering Team at Microsoft, currently focused on AI, programmability, and developer experience for Azure Cosmos DB. Over the years he has driven several programs of work in the team, including Apache Cassandra offerings, Java & Python developer ecosystems, high availability, multi-tenancy, and Generative AI developer advocacy. He also loves helping customers and partners be successful with the best AI database service on earth!", "public_name": "Theo van Kraay", "guid": "5e2487f8-663d-5b3d-b74c-50487d43e129", "url": "https://cfp.pydata.org/london2025/speaker/AGYUFC/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/PWHCFA/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/PWHCFA/", "attachments": []}, {"guid": "c99b07b2-97cf-5653-a034-974145e24e53", "code": "Q3QERT", "id": 77549, "logo": null, "date": "2025-06-07T15:30:00+01:00", "start": "15:30", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77549-platforms-for-valuable-ai-products-iteration-iteration-iteration", "url": "https://cfp.pydata.org/london2025/talk/Q3QERT/", "title": "Platforms for valuable AI Products: Iteration, iteration, iteration", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "In data science experimentation is vital, the more we can experiment, the more we can learn.\nHowever quick iteration isn't sufficient we also need to be able to easily promote these experiments to production to deliver value. This requires all the stability and reliability of any production system.\nJohn will discuss building platforms that treat iteration as a first class consideration, the role of open source libraries, and balancing trade-offs.", "description": "", "recording_license": "", "do_not_record": false, "persons": [{"code": "NK3PXH", "name": "John Carney", "avatar": "https://cfp.pydata.org/media/avatars/NK3PXH_YDFmj5K.webp", "biography": "John is an Independent Machine Learning & AI Product Consultant based in Manchester. He helps organisations get past the hype and deliver valuable data, and AI products. \n\nJohn is also chair of the PyData London conference, and an organiser at PyDataMCR", "public_name": "John Carney", "guid": "3540c0f0-fc6b-5a18-8f27-cdda944f6571", "url": "https://cfp.pydata.org/london2025/speaker/NK3PXH/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/Q3QERT/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/Q3QERT/", "attachments": []}, {"guid": "3ad03167-8ca9-5cf7-855b-8dff928e54e8", "code": "XTU8RH", "id": 77361, "logo": null, "date": "2025-06-07T16:15:00+01:00", "start": "16:15", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77361-networkx-is-fast-now-zero-code-change-acceleration", "url": "https://cfp.pydata.org/london2025/talk/XTU8RH/", "title": "NetworkX is Fast Now: Zero Code Change Acceleration", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Have you ever wondered how to find connections in your data and to gain insights from them? \nCome discover how NetworkX makes this easy (and fast!).\n\nThis talk is broadly divided into two parts. First we will talk about the power of graph analytics and how you can use tools like NetworkX to extract information from your data, and then we will talk about how we made the machinery behind NetworkX work with heterogeneous backends like GraphBLAS (CPU optimized) and cuGraph (GPU optimized).", "description": "### Part I\nNetworkX is the most popular library in Python for graph theory and applied network science thanks to its extensive API and beginner-friendly documentation. NetworkX is used \"everywhere\", because graphs are everywhere. Don't believe me? We surveyed more than 300 Python packages to understand how they use NetworkX in domains ranging from geoscience, neuroscience, genomics, biology, chemistry, quantum computing, text and language, machine learning, causal inference, optimization, and more. We will summarize what we learned to help you apply graph analytics to your data.\n\nOnce you start using NetworkX you will soon realize that the pure-Python implementation starts becoming a roadblock to scalable graph analytics.\n\n### Part II\nWhat should you do when your graph data becomes too large or NetworkX becomes too slow? Simple: use an accelerated NetworkX backend!\n\nNetworkX 3.0 added the ability to dispatch to other implementations. This means you can use other highly tuned libraries from NetworkX to achieve up to 100 to 10_000+ times speedup! As \"the API for graphs\", NetworkX now makes it easy to accelerate your graph workflows on CPUs with [GraphBLAS](https://github.com/python-graphblas/graphblas-algorithms) and NVIDIA GPUs with nx-cugraph. Other backends are welcome, and we plan to support distributed graphs soon for extreme scalability \ud83d\ude80\ud83d\ude80\ud83d\ude80\n\n### Outline:\n\n10 mins - Introduction to the world of network data, modeling with NetworkX, and needs of graph data in the world.\n\n10 mins - How do backends work? Trade-offs of using backends\n\n10 mins - Live demos", "recording_license": "", "do_not_record": false, "persons": [{"code": "KAJENT", "name": "Mridul Seth", "avatar": "https://cfp.pydata.org/media/avatars/KAJENT_0n7xcQB.webp", "biography": "Currently I work at European Spallation Source making sure the data munging pipelines reduce the experiment data. I am also on the board of NumFOCUS, and I have been involved with various projects like Scientific Python, NetworkX, Econ-ARK. I am broadly interested in the development and maintenance of the open source data & science software ecosystem and I try to help around wherever possible!", "public_name": "Mridul Seth", "guid": "f3f0f989-6081-592d-91c9-035ffd32e4b7", "url": "https://cfp.pydata.org/london2025/speaker/KAJENT/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/XTU8RH/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/XTU8RH/", "attachments": []}], "Hardwick Hub": [{"guid": "b08b4505-fa1e-5301-9a37-38452ff8d87d", "code": "W8VCU7", "id": 77410, "logo": "https://cfp.pydata.org/media/london2025/submissions/W8VCU7/pytorch-and-python-free-t_TXRZrOm.png", "date": "2025-06-07T10:20:00+01:00", "start": "10:20", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77410-parallel-pytorch-inference-with-python-free-threading", "url": "https://cfp.pydata.org/london2025/talk/W8VCU7/", "title": "Parallel PyTorch Inference with Python Free-Threading", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "This talk examines multi-threaded parallel inference on PyTorch models using the new No-GIL, free-threaded version of Python. Using a simple 124M parameter GPT2 model that we train from scratch, we explore the novel new territory unlocked by free-threaded Python: parallel PyTorch model inference, where multiple threads, unimpeded by the Python GIL, attempt to generate text from a transformer-based model in parallel.", "description": "Python 3.13, released in October 2024, is the first version of Python to introduce support for a \u201cno-GIL\u201d free-threaded mode, per PEP-703 Making the Global Interpreter Lock Optional in CPython, unlocking the ability for multiple Python threads to run simultaneously.\n\nThis allows, for the first time since the language\u2019s inception in December 1989, a single Python process to saturate all CPU cores in parallel with pure Python code (i.e. not farming out to extension modules written in C, C++, or, more recently, Rust).\n\nThis talk post explores what can be done with PyTorch now with the new free-threaded version of Python, specifically focusing on run-time inference on transformer-based generative models.\n\nWe will introduce a free-threaded implementation of an asyncio-based HTTP server that allows for parallel model inference of a GPT2 PyTorch model, scaling up to multiple GPUs with ease, all within a single Python process---this is novel, uncharted territory that is now unlocked thanks to free-threaded Python.", "recording_license": "", "do_not_record": false, "persons": [{"code": "HATRC3", "name": "Micha\u0142 Szo\u0142ucha", "avatar": "https://cfp.pydata.org/media/avatars/HATRC3_mVJCHVF.webp", "biography": "During his work at NVIDIA, Micha\u0142 gained vast experience in Deep Learning Software Development. He tackled challenges in training and inference, ranging from small-scale to large-scale applications, as well as user-facing tasks and highly-optimized benchmarks like MLPerf. Micha\u0142 also possesses a deep understanding of data loading problems, having worked as a developer on NVIDIA DALI, the Data Loading Library.", "public_name": "Micha\u0142 Szo\u0142ucha", "guid": "89dc5e78-9b89-54a6-adcb-153e05e7027b", "url": "https://cfp.pydata.org/london2025/speaker/HATRC3/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/W8VCU7/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/W8VCU7/", "attachments": []}, {"guid": "6b2ff6f2-bb6b-584f-9086-0e7bbec02a30", "code": "HPXBZN", "id": 77384, "logo": null, "date": "2025-06-07T11:05:00+01:00", "start": "11:05", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77384-sovereign-data-for-ai-with-python", "url": "https://cfp.pydata.org/london2025/talk/HPXBZN/", "title": "Sovereign Data for AI with Python", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "The only certainty in life is that the pendulum will always swing. Recently, the pendulum has been swinging towards repatriation. However, the infrastructure needed to build and operate AI systems using Python in a sovereign (even air-gapped) environment has changed since the shift towards the cloud. This talk will introduce the infrastructure you need to build and deploy Python applications for AI - from data processing, to model training and LLM fine-tuning at scale to inference at scale.  We will focus on open-source infrastructure including:\na Python library server (Pypi, Conda, etc) and avoiding supply chain attacks\na container registry that works at scale\na S3 storage layer\na database server with a vector index", "description": "The only certainty in life is that the pendulum will always swing. Recently, the pendulum has been swinging towards repatriation. However, the infrastructure needed to build and operate AI systems using Python in a sovereign (even air-gapped) environment has changed since the shift towards the cloud. This talk will introduce the infrastructure you need to build and deploy Python applications for AI - from data processing, to model training and LLM fine-tuning at scale to inference at scale.  We will focus on open-source infrastructure including:\na Python library server (Pypi, Conda, etc) and avoiding supply chain attacks\na container registry that works at scale\na S3 storage layer\na database server with a vector index", "recording_license": "", "do_not_record": false, "persons": [{"code": "NXHNQ3", "name": "Lex Avstreikh", "avatar": "https://cfp.pydata.org/media/avatars/NXHNQ3_84JqEba.webp", "biography": "Lex Avstreikh is the Head of Strategy at Hopsworks; a Swedish startup at the forefront of machine learning infrastructure. He focuses on identifying pivotal market trends and product initiatives.", "public_name": "Lex Avstreikh", "guid": "d1560593-ab47-5131-9796-9cce7d4cf5ef", "url": "https://cfp.pydata.org/london2025/speaker/NXHNQ3/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/HPXBZN/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/HPXBZN/", "attachments": []}, {"guid": "83bcd8c2-55a3-56b2-b56d-4ad61b72ab20", "code": "RDVWPC", "id": 77304, "logo": null, "date": "2025-06-07T11:50:00+01:00", "start": "11:50", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77304-cutting-edge-football-analytics-using-polars-keras-and-spektral", "url": "https://cfp.pydata.org/london2025/talk/RDVWPC/", "title": "Cutting Edge Football Analytics using Polars, Keras and Spektral", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Football analytics has rapidly evolved over the past five years, becoming a crucial part of professional and fan discourse. While much of the cutting-edge research remains hidden behind the fences of club training grounds, a growing ecosystem of open-source tools now enables anyone to develop advanced football analytics models.\n\nIn this talk, I'll showcase key open-source libraries\u2014Polars for high-performance data processing, Keras for deep learning, and Spektral for Graph Neural Networks (GNNs)\u2014to analyze millions of player coordinates from publicly available high-frequency positional tracking data. I'll demonstrate how these tools can be used to build in-game prediction models and extract advanced football metrics that only the most advanced football clubs currently use.", "description": "Football analytics has become an essential part of the modern game, influencing everything from tactical decisions to player recruitment. However, much of the cutting-edge research remains locked behind club training grounds, making it difficult for those outside the professional sphere to explore advanced analytical techniques. Fortunately, open-source tools have lowered the barrier to entry, enabling analysts, researchers, and enthusiasts to develop sophisticated models using publicly available data.\n\nThis talk will provide a hands-on introduction to building football analytics models with Polars, Keras, and Spektral. We will start by exploring specific open-source football analytics Python libraries (kloppy and mplsoccer) followed by a brief introduction of basic Polars functionality, to efficiently process millions of player and ball coordinates from high-frequency positional tracking data. Next, we will introduce Keras and Spektral for Deep Learning and Graph Neural Networks (GNNs), demonstrating how these tools can be used to develop in-game prediction models and extract advanced football metrics.\n\nAttendees will gain insights into how open-source machine learning techniques can be applied to football analytics, from raw data processing to model deployment. The session is suitable for those with a basic understanding of Python and machine learning concepts, but no prior experience with Polars or GNNs is required. Whether you're a data scientist, football analyst, or simply curious about the intersection of AI and sports, this talk will provide an overview of some of the most prominent open-source resources for cutting-edge football research.", "recording_license": "", "do_not_record": false, "persons": [{"code": "MMPZGL", "name": "Joris Bekkers", "avatar": "https://cfp.pydata.org/media/avatars/MMPZGL_UTLFJIl.webp", "biography": "I'm Joris Bekkers, a self-employed football analytics consultant with over 8 years of experience, specializing in research, development and implementation of cutting-edge tools, models and data visualizations. I'm a co-founder of PySport, a non-profit that aims to grow open-source sports analytics. You can find more information about me at www.unravelsports.github.io", "public_name": "Joris Bekkers", "guid": "0dc02a2e-f6a3-5e62-9496-b5c8632deedf", "url": "https://cfp.pydata.org/london2025/speaker/MMPZGL/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/RDVWPC/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/RDVWPC/", "attachments": []}, {"guid": "df0c67f4-0f88-5c45-a406-4897b3ede725", "code": "9K8PHR", "id": 77500, "logo": null, "date": "2025-06-07T14:45:00+01:00", "start": "14:45", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77500-pyscript-python-in-the-browser", "url": "https://cfp.pydata.org/london2025/talk/9K8PHR/", "title": "PyScript - Python in the Browser", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Learn how to write a web app in Python using PyScript, PyOdide, MicroPython, and WASM.", "description": "[PyScript](https://pyscript.net/) is a fast-growing and vibrant open-source platform for Python in the browser. Thanks to PyScript, [CPython](https://python.org/) and [MicroPython](https://micropython.org/) run anywhere a browser runs, which is everywhere!\n\nThis talk, by a PyScript contributor, shows the initial steps needed to get PyScript working. It will describe various aspects of Python browser apps, including UI creation, event handling, CSS styling, and calls to an AI to create content. \n\nWe assume you have basic Python skills but know little about Web technologies, such as JavaScript, CSS, or React. This talk will amaze you with how easy it is to write your own Python web app in the browser using PyScript.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GFJRKG", "name": "Chris  Laffra", "avatar": "https://cfp.pydata.org/media/avatars/GFJRKG_dYkXAlM.webp", "biography": "Chris Laffra is a seasoned professional with extensive experience in leadership, communication, and technology. Having worked at leading tech companies like Google and Uber, as well as major financial institutions such as Bank of America and JP Morgan, Chris has built a career focused on fostering effective communication and leadership within engineering teams.\n\nChris is also an accomplished author, with books on communication that cater specifically to engineers, aiming to make them more effective, productive, impactful, and happy in their roles. Additionally, Chris has taught numerous day-long masterclasses that delve into these topics, empowering engineers to excel both individually and as part of a team.", "public_name": "Chris  Laffra", "guid": "3a2f9278-5e51-5015-8614-e969ed2463d1", "url": "https://cfp.pydata.org/london2025/speaker/GFJRKG/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/9K8PHR/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/9K8PHR/", "attachments": []}, {"guid": "ca043492-b544-5296-81f6-2936a6e17955", "code": "WPBA9U", "id": 77022, "logo": null, "date": "2025-06-07T15:30:00+01:00", "start": "15:30", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77022-media-mix-modelling-how-we-can-save-company-budget", "url": "https://cfp.pydata.org/london2025/talk/WPBA9U/", "title": "Media Mix Modelling - how we can save company budget?", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "How can engineers empower marketing teams in the post-cookie era? Discover Bayesian Media Mix Modelling (MMM), a robust data science approach to evaluate multi-channel marketing effectiveness. Learn how to implement MMM and take actionable insights back to your company.", "description": "### Bayesian Media Mix Modeling: Empowering Engineers to Transform Marketing Analytics\n\nThe EU Cookie Law and similar regulations have reshaped the digital advertising landscape, creating challenges for marketing specialists accustomed to cookie-based tracking and last-click attribution. However, this challenge is also an opportunity for engineers and data scientists to step in and provide innovative solutions.\n\n**Bayesian Media Mix Modeling (MMM)** offers a powerful way to analyze the effectiveness of marketing campaigns across channels like advertising platforms, social media, and video streaming services\u2014without relying on personal user data. This talk is tailored for engineers, data scientists, and analysts who want to help their marketing colleagues navigate these uncertain waters by implementing MMM effectively.\n\nYou don\u2019t need a marketing background for this session\u2014just a solid grasp of classic data science principles and some experience in data engineering. We\u2019ll cover the fundamentals of MMM, including:\n\n#### Here\u2019s what we\u2019ll cover:\n\n1. **What is MMM?**  \n   A clear introduction to Media Mix Modeling, its purpose, and why it\u2019s essential in the post-cookie era.\n\n2. **Library Showdown: Which MMM Tools to Use**  \n   A comparison of popular Python libraries for MMM, highlighting their strengths, weaknesses, and best use cases.\n\n3. **From Inputs to Outputs: What You Need to Know**  \n   We\u2019ll discuss the required data inputs, expected outputs, and how to prepare for challenges when transitioning from theory to practice.\n\n4. **The Real-World Data Problem**  \n   Real-world data rarely resembles the clean examples you see in tutorials. Learn practical strategies to preprocess messy datasets and make your model work in realistic scenarios.\n\n5. **Collaboration with Marketing Teams**  \n   Discover why MMM is not a magic solution that replaces marketing professionals but rather a tool to enhance their decision-making. Learn how to foster effective collaboration between engineers and marketers.\n\n6. **Evaluating and Using MMM Daily**  \n   Practical advice on how to evaluate your MMM\u2019s performance, integrate it into daily workflows, and ensure it delivers actionable insights.\n\nBy the end of this session, you\u2019ll have the knowledge and inspiration to empower your organization with a cutting-edge marketing analytics solution\u2014putting engineers at the heart of the decision-making process.", "recording_license": "", "do_not_record": false, "persons": [{"code": "DKNAWB", "name": "Natalia Ziemba\u2011Jankowska", "avatar": null, "biography": "Data Science and Machine Learning Specialist with six years of experience. Previously focused on Computer Vision, Audio Machine Learning, and the implementation of Large Language Models (LLMs). Now, in KraftCode I'm dedicated to helping marketing teams optimise budgets and strategies through Media Mix Modelling.", "public_name": "Natalia Ziemba\u2011Jankowska", "guid": "02c5b352-1563-55c7-b92b-1103feea23ef", "url": "https://cfp.pydata.org/london2025/speaker/DKNAWB/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/WPBA9U/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/WPBA9U/", "attachments": []}, {"guid": "7f1bfe1c-c8a1-5ea5-affb-7e8116e7370f", "code": "DDJWLB", "id": 77458, "logo": null, "date": "2025-06-07T16:15:00+01:00", "start": "16:15", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77458-llm-inference-arithmetics-the-theory-behind-model-serving", "url": "https://cfp.pydata.org/london2025/talk/DDJWLB/", "title": "LLM Inference Arithmetics: the Theory behind Model Serving", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Have you ever asked yourself how parameters for an LLM are counted, or wondered why Gemma 2B is actually closer to a 3B model? You have no clue about what a KV-Cache is? (And, before you ask: no, it's not a Redis fork.) Do you want to find out how much GPU VRAM you need to run your model smoothly? \n\nIf your answer to any of these questions was \"yes\", or you have another doubt about inference with LLMs - such as batching, or time-to-first-token - this talk is for you. Well, except for the Redis part.", "description": "The talk will cover the theory necessary to understand how to serve LLMs. The talk covers the math behind transformers inference in an accessible and light way. By the end of the talk, attendants will learn:\n\n1. How to count the parameters in an LLM, especially the ones in the attention layers.\n2. The difference between compute and memory in the context of LLM inference.\n3. That LLM inference is made up of two parts: prefill and decoding.\n4. What is an LLM server, and what features they implement to optimise GPU memory usage and reduce latency\n4. How batching affects your inference metrics, like time-to-first-token.\n\nThe talk will cover:\n\n**Did you pay attention?** (4 min). A short review of the attention mechanism and how to count parameters in a transformer-based model.\n\n**Get to know your params** (8 min). The math-y section of the talk, explaining how to translate parameter counts into memory and compute requirements.\n\n**Prefill and Decoding** (8 min) Explains that inference happens in two steps (prefill and decoding) and how KV-cache exploits this to make decoding faster. Common metrics to measure inference performance, like time-to-first-token and token-per-second.\n\n**Context and batch size** (5 min) Adds to the picture the sequence length, as well as the number of requests to process in parallel. Explains how LLM servers, like vLLM, use techniques like Paged Attention to optimise GPU usage\n\n**Conclusion** (5 min) Wrap up, Q&A.", "recording_license": "", "do_not_record": false, "persons": [{"code": "LCEK33", "name": "Luca Baggi", "avatar": "https://cfp.pydata.org/media/avatars/LCEK33_5eAzatc.webp", "biography": "ML Engineer and open source maintainer", "public_name": "Luca Baggi", "guid": "67583db7-da6f-5b51-844f-9cf951b9a0a6", "url": "https://cfp.pydata.org/london2025/speaker/LCEK33/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/DDJWLB/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/DDJWLB/", "attachments": []}], "Library": [{"guid": "954b078c-b6db-5d03-a1fb-b852b33e908e", "code": "EGWMMC", "id": 77438, "logo": null, "date": "2025-06-07T10:20:00+01:00", "start": "10:20", "duration": "02:15", "room": "Library", "slug": "london2025-77438-pymc-code-sprint", "url": "https://cfp.pydata.org/london2025/talk/EGWMMC/", "title": "PyMC Code Sprint", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "Join the PyMC development team for a fun and engaging hackathon!", "description": "Whether you're a seasoned Bayesian or completely new to probabilistic programming, this is your chance to contribute: write code, squash bugs, improve documentation, and develop practical examples. You'll get hands-on guidance from PyMC core contributors while making real contributions to one of the leading Bayesian inference libraries in Python. No prior experience required\u2014just bring your laptop and enthusiasm to learn and collaborate!", "recording_license": "", "do_not_record": false, "persons": [{"code": "MZZ8YC", "name": "Chris Fonnesbeck", "avatar": "https://cfp.pydata.org/media/avatars/MZZ8YC_Toi8z8j.webp", "biography": "Chris is a Principal Quantitative Analyst at PyMC Labs and an Adjoint Associate Professor at the Vanderbilt University Medical Center, with 20 years of experience as a data scientist in academia, industry, and government. He is interested in computational statistics, machine learning, Bayesian methods, and applied decision analysis. He hails from Vancouver, Canada and received his Ph.D. from the University of Georgia.\u200b\u200b", "public_name": "Chris Fonnesbeck", "guid": "1820c9f2-1b76-5976-b9bd-1d66269268af", "url": "https://cfp.pydata.org/london2025/speaker/MZZ8YC/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/EGWMMC/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/EGWMMC/", "attachments": []}, {"guid": "01d0dc12-c2af-5ce9-a126-cb33d8b0c8c1", "code": "MRC78H", "id": 77388, "logo": "https://cfp.pydata.org/media/london2025/submissions/MRC78H/rebel-python_5uEOEpt_W2m5SZB.jpg", "date": "2025-06-07T15:30:00+01:00", "start": "15:30", "duration": "00:45", "room": "Library", "slug": "london2025-77388-python-engineering-excellence-birds-of-a-feather", "url": "https://cfp.pydata.org/london2025/talk/MRC78H/", "title": "Python Engineering Excellence Birds of a Feather", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "A round table discussion on how to excel at Python engineering and architecting systems using Python, what kind of sessions and activities would best help support Python programmers be more effective at Python engineering, and how to achieve Python engineering excellence generally.", "description": "The session would consist of a short intro on what it means to achieve Python engineering excellence.  Followed by going round everyone at the session asking about where they feel they are in terms of their Python engineering skills, where they want to improve, and what kind of activities would best support that improvement", "recording_license": "", "do_not_record": false, "persons": [{"code": "MZ3Y7X", "name": "Sam Joseph", "avatar": "https://cfp.pydata.org/media/avatars/MZ3Y7X_HfJrXt3.webp", "biography": "I am currently the lead AI developer at Qualis Flow, a company that is using the latest AI tech to help decarbonise the construction industry. Previously I was the CTO of NeuroGrid Ltd., a software consultancy firm providing data science and software engineering services. Before that I was a CoFounder of AgileVentures, where as the CTO we supported multiple open source international charity projects. Further back I was Head of Education and Engineering at the Makers Academy bootcamp, following many years as Associate Professor in Computer Science at Hawaii Pacific University, where I taught courses on AI, mobile, games and software engineering.  It all started with a Ph.D. in Machine Learning from the University of Edinburgh.", "public_name": "Sam Joseph", "guid": "5365fa19-af20-5ecf-bd7f-e6d4ea783f7c", "url": "https://cfp.pydata.org/london2025/speaker/MZ3Y7X/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/MRC78H/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/MRC78H/", "attachments": []}], "Elizabeth Board Room": [{"guid": "83a6a205-3b0b-58a6-b94d-b5e6e62e3b0b", "code": "VKBKTY", "id": 77597, "logo": null, "date": "2025-06-07T15:30:00+01:00", "start": "15:30", "duration": "00:45", "room": "Elizabeth Board Room", "slug": "london2025-77597-feminist-ai-lounge", "url": "https://cfp.pydata.org/london2025/talk/VKBKTY/", "title": "Feminist AI Lounge", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Join our chill space, unwind, chat about [Feminist AI](https://feministai.party) and contribute to the PyData London DIY collage zine.", "description": "", "recording_license": "", "do_not_record": false, "persons": [{"code": "FZKG9N", "name": "Ines Montani", "avatar": "https://cfp.pydata.org/media/avatars/FZKG9N_7il65fA.webp", "biography": "Ines Montani is a developer specializing in tools for AI and NLP technology. She\u2019s the co-founder and CEO of Explosion and a core developer of spaCy, a popular open-source library for Natural Language Processing in Python, and Prodigy, a modern annotation tool for creating training data for machine learning models.", "public_name": "Ines Montani", "guid": "974b319d-258e-5977-a654-ce3a696a831d", "url": "https://cfp.pydata.org/london2025/speaker/FZKG9N/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/VKBKTY/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/VKBKTY/", "attachments": []}]}}, {"index": 3, "date": "2025-06-08", "day_start": "2025-06-08T04:00:00+01:00", "day_end": "2025-06-09T03:59:00+01:00", "rooms": {"Grand Hall": [{"guid": "2f23306c-8b44-5948-ad2a-8ffc98e07294", "code": "ZDTG3L", "id": 77169, "logo": null, "date": "2025-06-08T10:15:00+01:00", "start": "10:15", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77169-ai-for-everyone-building-inclusive-machine-learning-models", "url": "https://cfp.pydata.org/london2025/talk/ZDTG3L/", "title": "AI for Everyone - Building Inclusive Machine Learning Models", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Artificial Intelligence (AI) and Machine Learning (ML) are transforming industries such as healthcare, finance, education, and entertainment. However, these advancements are not benefiting everyone equally. Biases in datasets, algorithms, and design processes often lead to AI systems that unintentionally exclude or misrepresent underrepresented communities, reinforcing societal inequalities.\n\nThis talk, \"AI for Everyone: Building Inclusive Machine Learning Models,\" explores the critical importance of developing AI systems that are ethical, fair, and accessible to all. We will examine real-world examples of AI bias, discuss techniques for identifying and mitigating bias in data and models, and explore frameworks for responsible AI development. Attendees will leave with actionable insights to design AI solutions that promote fairness, inclusivity, and social impact.", "description": "Artificial Intelligence (AI) and Machine Learning (ML) have become central to decision-making processes across industries, from automating hiring decisions to medical diagnostics and financial services. While AI has the potential to drive efficiency and innovation, its benefits are not always equitably distributed. Biases embedded in training datasets, model design, and algorithmic decision-making can lead to discriminatory outcomes that disproportionately affect marginalized communities.\n\nThis talk, \"AI for Everyone: Building Inclusive Machine Learning Models,\" will explore the impact of AI bias and discuss strategies for creating more inclusive AI systems. We will analyze real-world examples where AI has failed underrepresented groups, from facial recognition technologies that misidentify people of color to automated systems that reinforce gender and socioeconomic disparities.\n\nKey topics covered in this session include:\n\nBias in AI \u2013 Understanding how biases arise in datasets and machine learning models.\nDataset Design and Fair Representation \u2013 Best practices for creating diverse and representative training data.\nAlgorithmic Fairness \u2013 Techniques for detecting and mitigating bias in machine learning models.\nEthical AI Development \u2013 Principles and frameworks to ensure accountability, transparency, and inclusivity in AI.\nThe Societal Impact of Inclusive AI \u2013 How equitable AI can drive positive social change and empower underrepresented communities.\nThis session is designed for developers, data scientists, AI practitioners, and decision-makers who want to ensure fairness and inclusivity in their AI projects. Attendees will leave with a clear understanding of AI bias challenges and practical steps to design ethical, inclusive AI systems that benefit everyone.", "recording_license": "", "do_not_record": false, "persons": [{"code": "DB7J93", "name": "Elizabeth Osanyinro", "avatar": "https://cfp.pydata.org/media/avatars/DB7J93_3qClf4L.webp", "biography": "Elizabeth Osanyinro is a data analyst passionate about AI ethics, fairness, and inclusive technology. Currently a Business Analyst at Carbonnote AI, Elizabeth is completing an MSc in Applied Artificial Intelligence and Data Analytics at the University of Bradford. With experience as a digital marketing and business analyst, she has worked on diverse projects, including retail analytics, credit card fraud detection, and blockchain-based digital verification.\n\nElizabeth is proficient in tools such as Microsoft Excel, SAS, Python, R, Power BI, and Looker. As the founder of PyData Bradford, she actively fosters community-driven learning in AI and data science", "public_name": "Elizabeth Osanyinro", "guid": "053b6825-7767-5085-913c-d2d07b26c184", "url": "https://cfp.pydata.org/london2025/speaker/DB7J93/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/ZDTG3L/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/ZDTG3L/", "attachments": []}, {"guid": "2f9feb78-5c76-5797-a5cf-d6f4ff7e2cde", "code": "WJXMZP", "id": 77399, "logo": null, "date": "2025-06-08T11:00:00+01:00", "start": "11:00", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77399-reproducibility-in-embedding-benchmarks", "url": "https://cfp.pydata.org/london2025/talk/WJXMZP/", "title": "Reproducibility in Embedding Benchmarks", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Reproducibility in embedding benchmarks is no small feat. Prompt variability, growing computational demands, and evolving tasks make fair comparisons a challenge. The need for robust benchmarking has never been greater. In this talk, we\u2019ll explore the quirks and complexities of benchmarking embedding models, such as prompt sensitivity, scaling issues, and emergent behaviors. \n\nWe\u2019ll hear straight from the Massive Text Embedding Benchmark (MTEB) maintainers and show how MTEB (and its extensions like MMTEB and MIEB) simplifies reproducibility, making it easier for researchers and industry practitioners to measure progress, choose the right models, and push the boundaries of embedding performance.", "description": "Reproducibility in embedding benchmarks is no small feat. Prompt variability, growing computational demands, and evolving tasks make fair comparisons a challenge. The need for robust benchmarking has never been greater. \n\nThe Massive Text Embedding Benchmark (MTEB) addresses these challenges with a standardized, open-source framework for evaluating text embedding models. Covering diverse tasks like clustering, retrieval, and classification, MTEB ensures consistent and reproducible results. Extensions like MMTEB (multilingual) and MIEB (image) further expand its capabilities.\n\nIn this talk, we\u2019ll explore the quirks and complexities of benchmarking embedding models, such as prompt sensitivity, scaling issues, and emergent behaviors. We\u2019ll show how MTEB simplifies reproducibility, making it easier for researchers and industry practitioners to measure progress, choose the right models, and push the boundaries of embedding performance.", "recording_license": "", "do_not_record": false, "persons": [{"code": "ABV3RR", "name": "Isaac Chung", "avatar": "https://cfp.pydata.org/media/avatars/ABV3RR_A2GrQkY.webp", "biography": "My focus is on making AI systems usable, scalable, and maintainable. I'm currently a Staff Data Scientist at Zendesk, working on LLM-powered features that see millions of conversations a day. \n\nPreviously at Clarifai, I helped build and maintain multimodal retrieval systems in production. My background is in Aerospace Engineering and Machine Learning and I hold undergraduate (B.A.Sc in EngSci) and graduate (M.A.Sc) degrees from the University of Toronto.\n\nIn my spare time, I am a maintainer for MTEB, I like to see the world, and do a bit of running and hiking.", "public_name": "Isaac Chung", "guid": "a63801ff-ac16-5f65-a679-a233b9149e35", "url": "https://cfp.pydata.org/london2025/speaker/ABV3RR/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/WJXMZP/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/WJXMZP/", "attachments": []}, {"guid": "317f63bf-57f2-5bca-8f63-8d3ccd57b24f", "code": "9GTM3Q", "id": 77514, "logo": null, "date": "2025-06-08T11:45:00+01:00", "start": "11:45", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77514-cuda-in-python-a-new-era-for-gpu-acceleration", "url": "https://cfp.pydata.org/london2025/talk/9GTM3Q/", "title": "CUDA in Python: A New Era for GPU Acceleration", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "We discuss bringing Python natively to the CUDA ecosystem. From low level bindings to domain specific applications, CUDA is supporting Python standards and ecosystem. New libraries include nvmath-python for managing optimized mathematics libraries, cccl-python for cooperative threading and device parallelism, cuda-core for managing the complete CUDA toolstack from Python with no need for C++, and finally numba-cuda for generating device side kernels with integration of C++ device libraries and LTO IR.", "description": "CUDA has been accessible to Python developers for over a decade, but often through third-party abstractions that lag behind the latest CUDA releases. However, that\u2019s changing\u2014over the next year, NVIDIA is making Python a first-class CUDA language.\n\nIn this talk, we\u2019ll explore how Python programmers can leverage the CUDA platform today and how native Python support is evolving across the entire CUDA stack.\n\nWe begin with an overview of the CUDA programming model and how to manage accelerator devices as a core part of a Python application. Then, we dive into three practical examples:\n\nImage Processing for Machine Learning Pipelines \u2013 Launching, executing, and streaming transformations directly from Python.\nNeural Network Primitives \u2013 Implementing operations like softmax with blockwise parallelism.\nHigh-Performance Deep Learning \u2013 Integrating with optimized libraries that leverage low-level, highly tuned CUDA kernels.\nTo showcase the power of these Python interfaces, we conclude with a hands-on demonstration: implementing GPT-2 (inspired by llm.c) entirely in Python\u2014achieving performance nearly identical to its C counterpart.\n\nJoin us to discover the joy of CUDA from Python, and unlock new possibilities in GPU acceleration with a familiar, high-level language!", "recording_license": "", "do_not_record": false, "persons": [{"code": "WYWH8R", "name": "Andy Terrel", "avatar": "https://cfp.pydata.org/media/avatars/WYWH8R_dYVYj9v.webp", "biography": "I lead CUDA Python Product Management, working to make CUDA a Python native.\n\nI received my Ph.D. from the University of Chicago in 2010, where Ibuilt domain-specific languages to generate high-performance code for physics simulations with the PETSc and FEniCS projects. After spending a brief time as a research professor at the University of Texas and Texas Advanced Computing Center, I have been a serial startup executive, including a founding team member of Anaconda.\n\nI am a leader in the Python open data science community (PyData). A contributor to Python's scientific computing stack since 2006, I am most notably a co-creator of the popular Dask distributed computing framework, the Conda package manager, and the SymPy symbolic computing library. I was a founder of the NumFOCUS foundation. At NumFOCUS, I served as the president and director, leading the development of programs supporting open-source codes such as Pandas, NumPy, and Jupyter.", "public_name": "Andy Terrel", "guid": "2cb2a1c0-f3fe-5240-9bd0-a53129ff8e9b", "url": "https://cfp.pydata.org/london2025/speaker/WYWH8R/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/9GTM3Q/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/9GTM3Q/", "attachments": []}, {"guid": "cf41d9a5-1634-525e-8292-46e13ddadcb7", "code": "8NMPDW", "id": 77037, "logo": null, "date": "2025-06-08T13:30:00+01:00", "start": "13:30", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77037-keynote-innovation-is-dead", "url": "https://cfp.pydata.org/london2025/talk/8NMPDW/", "title": "Keynote- Innovation is Dead", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Join us for an exciting Keynote with Tony Mears!", "description": "Sunday 13:30 in the Grand Hall.", "recording_license": "", "do_not_record": false, "persons": [{"code": "QTSEYK", "name": "Tony Mears", "avatar": "https://cfp.pydata.org/media/avatars/QTSEYK_c01ESdx.webp", "biography": "Tony Mears is a director in the UK National Health Service (NHS) specialising in strategy and innovation.\nHe is the author of \u2018Innovation is Dead: dispatches from the front\u2019 which seeks to deploy cross sector innovation for public sector good. He has previously been deputy director of innovation for a new hospital programme \u2013 and led the technology strategy, EU Exit negotiation, and launch policy at the UK Space Agency, including as a delegate to the UN and European Space Agency.\nHe has sat on the advisory board of wearable company WHOOP, has an MA in Political Communications, a BA in Archaeology, and resides in the South of England with his wife and two children.", "public_name": "Tony Mears", "guid": "a4f4e667-2f9a-5387-bdd2-364cc62eb9ba", "url": "https://cfp.pydata.org/london2025/speaker/QTSEYK/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/8NMPDW/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/8NMPDW/", "attachments": []}, {"guid": "93a923fc-f931-5f8f-888c-fdadc536d698", "code": "CPNZ9G", "id": 77418, "logo": null, "date": "2025-06-08T14:45:00+01:00", "start": "14:45", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77418-diving-into-transformer-model-internals", "url": "https://cfp.pydata.org/london2025/talk/CPNZ9G/", "title": "Diving into Transformer Model Internals", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "While everybody and their dog is building applications on generative AI, the inner workings of transformers - the model architecture behind genAI age - is a mystery for most people. In this talk, I'll walk through how transformers are implemented, using real-life Python code from the HuggingFace transformers library.", "description": "The inner workings of transformers is a huge topic, and one that constantly evolves, so it's impossible to cover absolutely everything in 30 minutes. I'd like the audience to take away from this talk the \"minimal viable knowledge\" that helps them to understand the most salient details, and to build an intuition around what goes on under the hood.\n\nWe'll cover:\n\n1. An overview of how transformers process text using an example\n2. Transformers as a concept vs specific implementations, particularly HuggingFace's transformers library\n3. A code tour of the HuggingFace transformers library\n\nThis talk is primarily aimed at programmers and software engineers, who want to build a coder's intuition for how this stuff really works, as well as data scientists who want to better understand how transformers are implemented internally.", "recording_license": "", "do_not_record": false, "persons": [{"code": "K9XLAD", "name": "Matt Squire", "avatar": null, "biography": "Programmer, and CTO at Fuzy Labs. I enjoy AI, MLOps, bio-inspired computing and functional programming.", "public_name": "Matt Squire", "guid": "152eee1a-ff5e-5189-a604-8504fc39655b", "url": "https://cfp.pydata.org/london2025/speaker/K9XLAD/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/CPNZ9G/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/CPNZ9G/", "attachments": []}, {"guid": "35179336-fa21-5814-bb49-49df973504b2", "code": "HN7ZRP", "id": 77538, "logo": null, "date": "2025-06-08T15:30:00+01:00", "start": "15:30", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77538-you-came-to-a-python-conference-now-go-do-a-pr-review", "url": "https://cfp.pydata.org/london2025/talk/HN7ZRP/", "title": "You Came to a Python Conference. Now, Go Do a PR Review!", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "If you or your organization are spending time and resources attending a Python conference, you will want to ensure your team gets something immediately actionable and helpful out of it. As coders, we often think about writing code as the only way to contribute. However, pull request reviews are an often overlooked, but highly actionable way to have an impact.\n\nGiving good PR reviews is an art, with two equally important parts: the technical side and the communication side. While the technical side ensures the quality, maintainability, and efficiency of the Python code, the communication around the PR determines whether the feedback can be understood and acted upon. However, we have all seen code reviews that have been ignored or executed poorly due to poor communication.\n\nThis talk addresses both facets of PR reviews by introducing the archetypes of bad code reviewers: \n  1) The \u201cLooks Good to Me\u201d Reviewer: This peer reviewer provides little to no actionable feedback.\n  2) The \u201cTechnical Nitpicker\u201d: This peer reviewer focuses on small Python-specific issues, but fails to \n      communicate constructively.\n  3) The \u201cNit\u201d Commenter: This peer reviewer prefaces every comment with \u201cnit,\u201d while offering unclear, yet technically valid suggestions \n\nUsing these archetypes, we will explore Python-specific technical topics (such as pass by reference vs. pass by value), while delving into how to communicate and deliver feedback in a clear and actionable manner. Using real-world examples, attendees will learn how to:\n     a) Identify and address technical issues in Python PRs\n     b) Communicate feedback effectively\n     c) Balance technical rigor with constructive feedback\n     d) Communicate their peer review comments clearly", "description": "# 1. Introduction (5 minutes)\n- a. How pull request reviews are a great way to use your Python skills to make an impact  \n- b. Overview of what makes a good PR review: technical Python knowledge and clear, helpful communication  \n\n# 2. Archetypes of Bad Reviewers (5 minutes)\n- a. The \u201cLooks Good to Me\u201d reviewer \u2013 No meaningful feedback  \n- b. The \u201cTechnical Nitpicker\u201d \u2013 Overly technical but unconstructive communication  \n- c. The \u201cNit\u201d Commenter \u2013 Poor communication despite valid points  \n\n# 3. Technical Python Knowledge for PR Reviews (20 minutes)\n- a. Pass by reference vs. pass by value  \n- b. Immutable vs. mutable types  \n- c. Common Python-specific pitfalls  \n  - i. Ex: Avoiding default mutable arguments  \n- d. Identifying inefficiencies  \n  - i. Loops vs. list comprehensions  \n  - ii. When to use generators  \n- e. Using underutilized tools  \n  - i. `pathlib`  \n  - ii. `defaultdict`  \n\n# 4. Communication Related to PR Reviews (7 minutes)\n- a. Principles of constructive feedback  \n  - i. Clarity  \n  - ii. Respect  \n  - iii. Specificity  \n  - iv. Why  \n- b. Techniques for making technical feedback actionable  \n- c. Encouraging dialogue in PRs  \n\n# 5. Conclusion (3 minutes)\n- a. Recap key takeaways  \n- b. Balance technical rigor with clear, helpful communication", "recording_license": "", "do_not_record": false, "persons": [{"code": "HQ3RL9", "name": "Samiul Huque", "avatar": "https://cfp.pydata.org/media/avatars/HQ3RL9_CG5BC6e.webp", "biography": "Samiul Huque is a senior software engineer at Bloomberg, where he works on the company\u2019s Instant Bloomberg (IB for short) chat tool. He works across the stack, building full-stack products where he primarily uses a combination of JavaScript/TypeScript and Python. Outside of work, Samiul plays tennis, competes in MMA, and eats his steak medium rare. Samiul earned his bachelor\u2019s degree in economics and mathematics from the University of Richmond.", "public_name": "Samiul Huque", "guid": "f65f2671-95f1-5c0c-ab45-0d09d8aaf4a7", "url": "https://cfp.pydata.org/london2025/speaker/HQ3RL9/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/HN7ZRP/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/HN7ZRP/", "attachments": []}, {"guid": "ab28be0b-2746-5c27-aeae-ec708ef73f6b", "code": "LSXNTQ", "id": 77676, "logo": null, "date": "2025-06-08T16:15:00+01:00", "start": "16:15", "duration": "00:45", "room": "Grand Hall", "slug": "london2025-77676-scaling-ai-workloads-with-ray-airflow", "url": "https://cfp.pydata.org/london2025/talk/LSXNTQ/", "title": "Scaling AI workloads with Ray & Airflow", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "[Ray](https://www.ray.io/) is an open-source framework for scaling Python applications, particularly machine learning and AI workloads. It provides the layer for parallel processing and distributed computing. Many large language models (LLMs), including OpenAI's GPT models, are trained using Ray.\n\nOn the other hand, [Apache Airflow](https://github.com/apache/airflow) is a consolidated data orchestration framework downloaded more than 20 million times monthly.\n\nThis talk presents the [Airflow Ray provider package](https://github.com/astronomer/astro-provider-ray) that allows users to interact with Ray from an Airflow workflow. In this talk, I'll show how to use the package to create Ray clusters and how Airflow can trigger Ray pipelines in those clusters.", "description": "This talk will discuss the benefits of using the [Airflow Ray provider package](https://github.com/astronomer/astro-provider-ray)  to orchestrate Ray pipelines using Apache Airflow. They include:\n- Integration: Incorporate Ray jobs into Airflow DAGs for unified workflow management.\n- Distributed computing: Use Ray's distributed capabilities within Airflow pipelines for scalable ETL and LLM  fine-tuning.\n- Monitoring: Track Ray job progress through Airflow's user interface.\n- Dependency management: Define and manage dependencies between Ray jobs and other tasks in DAGs.\n- Resource allocation: Run Ray jobs alongside other task types within a single pipeline.-", "recording_license": "", "do_not_record": false, "persons": [{"code": "9EQYZP", "name": "Tatiana Al-Chueyr", "avatar": "https://cfp.pydata.org/media/avatars/9EQYZP_vCXkKve.webp", "biography": "Tatiana is a Staff Software Engineer at Astronomer and builds open-source tools to improve Apache Airflow.\n\nSince graduating in Computer Engineering at Unicamp, Brazil, she has worked on multiple projects and contributed to various open-source projects. Before working at Astronomer, she worked for the Brazilian Ministry of Science and Technology, Globo, Education First, and BBC.", "public_name": "Tatiana Al-Chueyr", "guid": "f6bc197a-147c-5c4c-876a-c387fd746ac7", "url": "https://cfp.pydata.org/london2025/speaker/9EQYZP/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/LSXNTQ/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/LSXNTQ/", "attachments": []}], "Doddington Forum": [{"guid": "d883700a-7932-50e9-8cb6-cd7052437e50", "code": "A87LEE", "id": 77094, "logo": null, "date": "2025-06-08T10:15:00+01:00", "start": "10:15", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77094-from-trees-to-transformers-our-journey-towards-deep-learning-for-ranking", "url": "https://cfp.pydata.org/london2025/talk/A87LEE/", "title": "From Trees to Transformers: Our Journey Towards Deep Learning for Ranking", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "GetYourGuide, a global marketplace for travel experiences, reached diminishing returns with its XGBoost-based ranking system. We switched to a Deep Learning pipeline in just nine months, maintaining high throughput and low latency. We iterated on over 50 offline models and conducted more than 10 live A/B tests, ultimately deploying a PyTorch transformer that yielded significant gains. In this talk, we will share our phased approach\u2014from a simple baseline to a high-impact launch\u2014and discuss the key operational and modeling challenges we faced. Learn how to transition from tree-based methods to neural networks and unlock new possibilities for real-time ranking.", "description": "GetYourGuide is a global online marketplace that helps travelers discover and book the best experiences. One of our core challenges is ensuring users always see the most relevant activities first\u2014a task historically powered by an XGBoost-based ranking system. However, as we continued refining our tree-based models, returns on incremental improvements began to plateau. To spark our next step change in performance, we decided to adopt Deep Learning.\n\nIn this talk, we will share how, in just nine months, we migrated our ranking pipeline to a Deep Learning architecture while maintaining tight latency and high-throughput requirements. We will walk through our phased approach, starting with a minimal viable model to confirm our production setup and gradually increasing its complexity. Along the way, we tested over 50 iterations offline and ran more than 10 live A/B tests to validate the impact on our customers. Ultimately, we rolled out a PyTorch transformer-based model with significant business impact. We will also discuss the main challenges we faced on the operational and modeling sides, how we overcame them, and the lessons we learned.\n\nYou will leave with practical strategies for transitioning from traditional tree-based models to neural networks in production. Join us to learn how to advance your machine-learning capabilities and unlock new dimensions of relevance and personalization for real-time ranking.", "recording_license": "", "do_not_record": false, "persons": [{"code": "EZMJWT", "name": "Theodore Meynard", "avatar": "https://cfp.pydata.org/media/avatars/EZMJWT_io5jCH5.webp", "biography": "Theodore Meynard is a data science manager at GetYourGuide.He leads the evolution of their ranking algorithm, helping customers to find the best activities to book and locations to explore. Beyond work, he is one of the co-organizers of the Pydata Berlin meetup and the conference. \nWhen he is not programming, he loves riding his bike, looking for the best bakery-patisserie in town.", "public_name": "Theodore Meynard", "guid": "e8cd8307-aa2c-58ef-bf5a-fb20a673814b", "url": "https://cfp.pydata.org/london2025/speaker/EZMJWT/"}, {"code": "LPMP88", "name": "Mihail Douhaniaris", "avatar": "https://cfp.pydata.org/media/avatars/LPMP88_e9HpKil.webp", "biography": "Mihail Douhaniaris is a Senior Data Scientist at GetYourGuide, where he specializes in improving the marketplace ranking algorithms to improve search relevance. His work helps travelers find experiences that match their preferences more effectively. Beyond his role, Mihail is deeply interested in responsible AI, ML observability, and the challenges of deploying machine learning at scale.", "public_name": "Mihail Douhaniaris", "guid": "2f5bb1ea-cdee-5e35-8032-5bba5fa126d8", "url": "https://cfp.pydata.org/london2025/speaker/LPMP88/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/A87LEE/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/A87LEE/", "attachments": []}, {"guid": "a68369dc-a262-5035-8ef1-ddb64305428d", "code": "A8PQEU", "id": 77462, "logo": "https://cfp.pydata.org/media/london2025/submissions/A8PQEU/IMG_7207_eWkghm9_pc8F0Gc.JPG", "date": "2025-06-08T11:00:00+01:00", "start": "11:00", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77462-making-llms-reliable-a-practical-framework-for-production", "url": "https://cfp.pydata.org/london2025/talk/A8PQEU/", "title": "Making LLMs reliable: A practical framework for production", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "LLM outputs are non-deterministic, making it difficult to ensure reliability in production, especially in high-risk applications. In this talk, we\u2019ll walk through a structured approach to making LLMs production-ready. We\u2019ll cover setting up tests during experimentation, implementing real-time guardrails before responses reach users, and monitoring live performance for critical issues. Finally, we\u2019ll discuss post-deployment log analysis to drive continuous improvements and build trust with stakeholders.", "description": "LLMs are transforming how we build applications, but their non-deterministic outputs and potential for hallucination create barriers for adoption in high-risk industries. In this talk will discuss a systematic approach to LLM application development that covers pre-production and experimentation phase, real-time guardrails for output validation and post analysis for identifying areas for improvement.\n\nWe\u2019ll talk about:\n- Creating comprehensive test sets with edge case coverage\n- Unit tests for LLMs and establishing baseline metrics for reliability assessment\n- Structured experimentation approaches for prompt optimization\n- Real-time guardrails for output validation\n- Live monitoring and alert systems\n- Log analysis for pattern identification\n\nWe'll demonstrate practical implementations using Python libraries and monitoring tools, with real-world examples from production systems. The session will provide actionable insights for software developers, AI engineers and product managers looking to deploy LLM applications responsibly and gain stakeholder trust.\n\nAttendees will leave with:\n- A structured framework for LLM application development\n- Practical code examples for implementing guardrails\n- Strategies for continuous monitoring and improvement\n\nThis talk is suitable for intermediate practitioners who work with LLMs and need to ensure their reliable deployment in production environments.", "recording_license": "", "do_not_record": false, "persons": [{"code": "KSTNT9", "name": "Lena Shakurova", "avatar": "https://cfp.pydata.org/media/avatars/KSTNT9_W78RBS3.webp", "biography": "Lena Shakurova is the founder of ParsLabs (https://parslabs.org), a Conversational AI agency, and Chatbotly (https://chatbotly.co), a no-code platform for building AI assistants trained on custom data.\n\nAt ParsLabs, she leads a team blending AI, user research and conversation science to design and develop high quality AI Conversations that sound human. She has background in NLP and Artificial intelligence and 7+ years of experience and 80+ successful projects building production-ready chatbots and voice assistants.\n\nLena focuses on ethical, user-first AI, leveraging her expertise in Linguistics & AI to create responsible, high-quality AI solutions. She shares insights on AI innovation and human-centered design through her blog (https://shakurova.io/blog) and LinkedIn (https://www.linkedin.com/in/lena-shakurova/).", "public_name": "Lena Shakurova", "guid": "cb65ad72-de81-5be9-816a-a81c562e9a02", "url": "https://cfp.pydata.org/london2025/speaker/KSTNT9/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/A8PQEU/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/A8PQEU/", "attachments": []}, {"guid": "ecd61762-7145-584b-994d-a8c0ff206ba2", "code": "C7KGVS", "id": 77840, "logo": null, "date": "2025-06-08T11:45:00+01:00", "start": "11:45", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77840-analysing-smart-meter-data-to-uncover-energy-consumption-patterns", "url": "https://cfp.pydata.org/london2025/talk/C7KGVS/", "title": "Analysing smart meter data to uncover energy consumption patterns", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Smart meters have the potential to not only provide information to individual householders about their energy consumption, but to identify patterns of usage across the entire energy system. At Nesta, we have been analysing smart meter data to uncover information about energy consumption habits, and how household appliances, physical property characteristics and demographic factors influence energy usage - as this can help develop energy-saving initiatives.\nIn this talk we will present the data science techniques we used, such as clustering, present our results as well as discuss how we translate them to a non-data science audience, and share learnings of conducting data science work in a secure data lab to allow for analysis of sensitive and confidential data.", "description": "This talk is for those interested in learning about:\n- applied data science in a non-profit organisation & in the field of sustainability/ home decarbonisation;\n- the data science techniques we used to uncover patterns of energy usage, such as clustering;\n- conducting data science work in a secure lab environment/ how to analyse sensitive and confidential data;\n- translating insights to a non-data science audience;\n- working with multidisciplinary teams, including designers and domain experts.", "recording_license": "", "do_not_record": false, "persons": [{"code": "7HMSUV", "name": "Sofia Pinto", "avatar": "https://cfp.pydata.org/media/avatars/7HMSUV_q41mfwJ.webp", "biography": "Sofia is a data scientist at Nesta, working with the sustainable future mission team on decarbonising UK homes. During her time at Nesta, Sofia worked with energy performance certificates, social media and smart meter data to: estimate the cost of low carbon heating technologies, identify issues faced by homeowners in their low carbon heating path, understand how people consume energy in their homes.\n\nPrior to joining Nesta, Sofia worked as a data scientist at Imperial College London, assessing the accuracy of crowdsourced data for road traffic collision and injury surveillance. Before this she worked as a research fellow at the Social Physics and Complexity research group, LIP Portugal, on health related projects such as identifying antibiotic over-prescription and factors influencing it.\n\nSofia holds a Bachelor\u2019s degree in Applied Mathematics and Master\u2019s degree in Data Science and Advanced Analytics.", "public_name": "Sofia Pinto", "guid": "f7372c0f-b30c-5228-aa84-fe4cac99bfe5", "url": "https://cfp.pydata.org/london2025/speaker/7HMSUV/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/C7KGVS/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/C7KGVS/", "attachments": []}, {"guid": "cea6cfa7-a302-5e18-9643-f44188e07365", "code": "H3H3BL", "id": 77374, "logo": null, "date": "2025-06-08T14:45:00+01:00", "start": "14:45", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77374-agentic-cyber-defense-with-external-threat-intelligence", "url": "https://cfp.pydata.org/london2025/talk/H3H3BL/", "title": "Agentic Cyber Defense with External Threat Intelligence", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "This talk will detail how to integrate external threat intelligence data into an autonomous agentic AI system for proactive cybersecurity. Using real world datasets\u2014including open-source threat feeds, security logs, or OSINT\u2014you will learn how to build a data ingestion pipeline, train models with Python, and deploy agents that autonomously detect and mitigate cyber threats. This case study will provide practical insights into data preprocessing, feature engineering, and the challenges of adversarial conditions.", "description": "In an era where cyber threats are growing both in complexity and frequency, harnessing external threat intelligence can provide a decisive edge in cybersecurity. This session offers a deep dive into developing autonomous agentic AI systems that leverage publicly available threat data to drive proactive defense mechanisms.\n\nKey Focus Areas:\n\nIntegrating External Data: Learn strategies to ingest, clean, and harmonize diverse external datasets\u2014such as open-source threat feeds, OSINT, and incident logs\u2014with your internal security data, creating a comprehensive situational awareness.\n\nAgentic AI in Cyber Defense:\nUnderstand the core principles behind agentic AI and its application in autonomous cybersecurity systems. Discover how AI agents can continuously monitor network behavior, learn from evolving threats, and execute proactive countermeasures.\n\nAddressing Security Challenges:\nDelve into the challenges of deploying autonomous systems in adversarial environments. The talk will cover best practices for mitigating vulnerabilities, including strategies to combat adversarial attacks and data poisoning.", "recording_license": "", "do_not_record": false, "persons": [{"code": "YEK3EY", "name": "Jyoti Yadav", "avatar": "https://cfp.pydata.org/media/avatars/YEK3EY_ozjZa4u.webp", "biography": "Jyoti is an Applied Cyber Security Data Scientist at Microsoft, UK. Jyoti has a total of 8 years of experience in top notch technologies like blockchain, cybersecurity and finance industry. Jyoti has primarily worked on the LLM, fine-tuning and agent AI systems.", "public_name": "Jyoti Yadav", "guid": "13fea77a-7252-5c6e-941d-66ce7b2a9365", "url": "https://cfp.pydata.org/london2025/speaker/YEK3EY/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/H3H3BL/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/H3H3BL/", "attachments": []}, {"guid": "5db6418f-bb8a-503c-a54e-b21f70134f01", "code": "FRRUL8", "id": 77099, "logo": null, "date": "2025-06-08T15:30:00+01:00", "start": "15:30", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77099-is-coding-assistant-as-good-as-we-thought-in-coding", "url": "https://cfp.pydata.org/london2025/talk/FRRUL8/", "title": "Is coding assistant as good as we thought in coding?", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Nowadays coding assistants are everywhere, many IDEs are offering them as plugins, and are becoming more and more powerful. But it prompts us questions, is coding assistant as good as we want it to be? What can and can't these AI agents do? Will AI take my job?", "description": "In this talk, the speaker will explain the current state of AI coding assistants, what is in the market, and what they promise. The speaker will also, with some real experience from developers who have used coding assistants, explore the potential and limitations of the assistants. From there, we will also look into the future, predicting the landscape of the software engineering industry and as a developer how we can take advantage of the coding assistants instead of getting our jobs taken by them.\n\n## Topics covered\n\n- Introduction to various coding assistants\n- The pros and cons of using coding assistants\n- How will coding assistant affect the industry\n- As developers, who shall we position ourselves in the AI landscape\n- Summary and take aways\n\n## Goal\n\nTo explain, in an as objective way as possible, the effect of AI coding assistants in a developer's career and to be proactive in preparing what's to come.\n\n## Target Audience\n\nEveryone who codes for a living or anyone who is enthusiastic about coding. The speaker expects all levels of familiarity with AI coding assistants.", "recording_license": "", "do_not_record": false, "persons": [{"code": "8EGVC9", "name": "Cheuk Ting Ho", "avatar": "https://cfp.pydata.org/media/avatars/8EGVC9_LbezfQb.webp", "biography": "After having a career as a Data Scientist and Developer Advocate, Cheuk dedicated her work to the open-source community. Currently, she is working as AI developer advocate for JetBrains. She has co-founded Humble Data, a beginner Python workshop that has been happening around the world. She has served the EuroPython Society board for two years and is now a fellow and director of the Python Software Foundation.", "public_name": "Cheuk Ting Ho", "guid": "6acb0b45-07a8-5f1c-a3fa-45ae8f0a9858", "url": "https://cfp.pydata.org/london2025/speaker/8EGVC9/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/FRRUL8/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/FRRUL8/", "attachments": []}, {"guid": "2e682693-19a6-57b0-9317-65da82ba430f", "code": "AYL3PL", "id": 77067, "logo": "https://cfp.pydata.org/media/london2025/submissions/AYL3PL/new_narwhals_small_nyx4Wr_kCUtZao.png", "date": "2025-06-08T16:15:00+01:00", "start": "16:15", "duration": "00:45", "room": "Doddington Forum", "slug": "london2025-77067-polars-duckdb-pyspark-pyarrow-pandas-cudf-how-narwhals-has-brought-them-all-together", "url": "https://cfp.pydata.org/london2025/talk/AYL3PL/", "title": "Polars, DuckDB, PySpark, PyArrow, pandas, cuDF: how Narwhals has brought them all together!", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Suppose you want to write a data science tool to do feature engineering. Your experience may go like this:\n- Expectation: you can focus on state-of-the art techniques for feature engineering.\n- Reality: you keep having to make you codebase more complex because a new dataframe library has come out and users are demanding support for it.\n\nOr rather, it might have gone like that in the pre-Narwhals era. Because now, you can focus on solving the problems which your tool set out to do, and let Narwhals handle the subtle differences between different kinds of dataframe inputs!", "description": "Narwhals is a lightweight and extensible compatibility layer between dataframe libraries. It is already used by several open source libraries including Altair, Marimo, Plotly, Scikit-lego, Vegafusion, and more. You will learn how to use Narwhals to build dataframe-agnostic tools.\n\nThis is a technical talk aimed at tool-builders. You'll be expected to be familiar with Python and dataframes. We will cover:\n\n- 2-3 minutes: Motivation. Why are there so many dataframe libraries?\n- 2-3: minutes: Life before vs after Narwhals - real-world examples of how the data landscape is changing\n- 7-8 minutes: Basics of Narwhals, wrapping native objects, expressions vs Series, lazy vs eager\n- 7-8 minutes: Advanced Narwhals concepts: row order, non-elementary group-by aggregations, multi-indices, null values, backwards-compatibility promises\n- 10 minutes: What is the Narwhals community like, how can you contribute and get involved, what comes next?\n- 5-10 minutes: Engaging Q&A / awkward silence\n\nTool builders will benefit from the talk by learning how to build tools for modern dataframe libraries without sacrificing support for foundational classic libraries such as pandas.", "recording_license": "", "do_not_record": false, "persons": [{"code": "KEUJ9U", "name": "Marco Gorelli", "avatar": "https://cfp.pydata.org/media/avatars/KEUJ9U_Q9qz0Ku.webp", "biography": "Marco is the author of Narwhals, core contributor to pandas and Polars, and works at Quansight Labs as Senior Software Engineer. He also consults and trains clients professionally on Polars. He has also written the first Polars Plugins Tutorial and has taught Polars Plugins to clients.\n\nHe has a background in Mathematics and holds an MSc from the University of Oxford, and was one of the prize winners in the M6 Forecasting Competition (2nd place overall Q1).", "public_name": "Marco Gorelli", "guid": "2d6c7d56-9236-5478-9821-f6b849f264bd", "url": "https://cfp.pydata.org/london2025/speaker/KEUJ9U/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/AYL3PL/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/AYL3PL/", "attachments": []}], "Hardwick Hub": [{"guid": "efec3201-3bfc-56c1-845b-d3fd4540534b", "code": "TLFMW3", "id": 77168, "logo": "https://cfp.pydata.org/media/london2025/submissions/TLFMW3/headshot_UXI1Vi8_j6Hs2fu.jpg", "date": "2025-06-08T10:15:00+01:00", "start": "10:15", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77168-automating-porosity-detection-in-additive-manufacturing-with-deep-learning", "url": "https://cfp.pydata.org/london2025/talk/TLFMW3/", "title": "Automating Porosity Detection in Additive Manufacturing with Deep Learning", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Additive Manufacturing (AM) enables complex, high-performance components, but porosity defects can compromise structural integrity. Traditional porosity analysis in X-ray CT scans is manual, slow, and inconsistent. This talk introduces a deep learning-based approach using CNNs and segmentation models to automate porosity detection, enhancing accuracy and efficiency. Attendees will gain insights into pre-processing 3D CT scans, training AI models, and solving industry challenges.", "description": "This talk delves into the application of deep learning to automate porosity detection in additive manufacturing (AM) components. Using convolutional neural networks (CNNs) and advanced image segmentation models, the session walks through the entire pipeline, from pre-processing 3D CT scan data to training and evaluating AI models, while addressing practical challenges like imbalanced datasets and computational costs.\n\nAs an informative and technical session, this talk demonstrates how AI can significantly enhance defect analysis, making quality control in AM faster, more accurate, and scalable. Attendees will leave with a clear understanding of the technical process, real-world applications, and the potential for AI to transform AM quality assurance.\n\nTime Outline:\n1.\tIntroduction (0-5 min) \u2013 AM overview, porosity challenges, limitations of manual analysis.\n2.\tDeep Learning for Porosity Detection (5-20 min) \u2013 CNNs, segmentation models, pre-processing.\n3.\tCase Study (20-25 min) \u2013 Real-world application, performance metrics, challenges.\n4.\tFuture Directions (25-30 min) \u2013 AI-driven quality control.\n\nThis talk is ideal for AI practitioners, engineers, and researchers, bridging deep learning with industrial defect detection. While no hands-on activities are included, references to open-source tools and datasets will be provided for interested attendees that want to explore.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GH3EFQ", "name": "Onyekachukwu Ojumah", "avatar": "https://cfp.pydata.org/media/avatars/GH3EFQ_2qOBb0F.webp", "biography": "Onyekachukwu Ojumah is an AI Engineer with a strong background in data analytics, cloud computing, and machine learning. She holds an MSc in Artificial Intelligence from the University of Huddersfield and a BSc in Computer Science from McPherson University, where she graduated as the Best Graduating Student.\nCurrently, Onyekachukwu works as an AI Engineer at Victorian Plumbing, where she designs and implements AI-driven solutions to optimize operational processes and drive business innovation. She has co-authored research papers on AI applications across various industries, exploring how AI can transform workflows and decision-making.\nAs the organizer of PyData Huddersfield, she leads a vibrant community of data professionals and enthusiasts, fostering collaboration and knowledge-sharing around AI and machine learning. Onyekachukwu has also spoken at notable events, including DataFest Africa and MLOps Lagos, where she shared insights on AI-driven data engineering, model optimization, and data strategies.", "public_name": "Onyekachukwu Ojumah", "guid": "f3593ad2-ac7d-531b-be66-1179b1475d1d", "url": "https://cfp.pydata.org/london2025/speaker/GH3EFQ/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/TLFMW3/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/TLFMW3/", "attachments": []}, {"guid": "a8833738-c7f7-598a-bbfb-62372a40811b", "code": "PDXDNQ", "id": 77074, "logo": null, "date": "2025-06-08T11:00:00+01:00", "start": "11:00", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77074-one-repo-to-rule-them-all-one-repo-to-bind-them-control-all-of-your-projects-with-copier", "url": "https://cfp.pydata.org/london2025/talk/PDXDNQ/", "title": "One repo to rule them all, one repo to bind them...Control all of your projects with copier!", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Did you know you can control all of your projects from a central template repository? In this talk we'll learn about copier, a framework for creating project templates. A natural successor to cookiecutter and GitHub templates, copier lets your projects re-sync from the original template, with new or the same arguments. Adopt the latest and greatest tools without leaving any of your libraries behind!", "description": "Developers love to work on new projects. Researchers love to experiment on new ideas. If you're anything like me, you have lots of little libraries for every new problem or idea that comes your way. And if you're like me, you also love keeping abreast of the latest-and-greatest tooling in the ever-changing Python ecosystem.\n\nMy approach for a new project has always been to copy/paste/find/replace my most recently used project as a template. This lead to a predictable problem - every project evolved in small ways from the one before it. Travis became GitHub actions, flake8/black became ruff, my setup.pys were replaced by pyproject tomls...I created an unmaintainable mess of almost-immediately deprecated patterns! I tried to leverage cookiecutter and template repos with mixed success.\n\nInstead of making progress on any project, I was constantly bogged down amidst a perpetually updating ecosystem. \n\nIn this talk, I'll discuss the solution: copier. Copier lets you render projects from templates...and keep them in sync with upstream changes. With a few tweaks and a helpful GitHub action, you can control all of your projects from one central location.\n\nAdd a rust extension? No problem. New linter flags? Trivial! Accidentally mispelled your own name in 50+ public projects? In this talk, I'll show you how to pretend it never happened!", "recording_license": "", "do_not_record": false, "persons": [{"code": "TM3KV3", "name": "Tim Paine", "avatar": "https://cfp.pydata.org/media/avatars/TM3KV3_c6kvP3r.webp", "biography": "Tim is a Quantitative Developer at Cubist Systematic Strategies and an adjunct professor in the Computer Science Department at Columbia University.", "public_name": "Tim Paine", "guid": "2db096c1-98d8-57f5-90ac-450532c61f08", "url": "https://cfp.pydata.org/london2025/speaker/TM3KV3/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/PDXDNQ/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/PDXDNQ/", "attachments": []}, {"guid": "edc8f14d-a4ed-5b52-b3c4-c73e0e5808e7", "code": "DP77TK", "id": 77372, "logo": null, "date": "2025-06-08T11:45:00+01:00", "start": "11:45", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77372-git-commit-medtech-transformed-python-s-medical-robotics-breakthrough", "url": "https://cfp.pydata.org/london2025/talk/DP77TK/", "title": "Git Commit, MedTech Transformed: Python\u2019s Medical Robotics Breakthrough", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Code changing lives? Absolutely. We're diving into Python's power to deploy cutting-edge solutions for lung cancer diagnosis and treatment in medical and surgical robotics. Expect demos showcasing algorithms, data analysis, and real-world impact\u2014bridging MedTech innovation and life-changing solutions. Ready to see Python revolutionize lung health? Join us. Let's code a healthier future together!", "description": "This talk will provide a demo of a deep learning model developed using Python for lung nodule detection and classification in medical images. The model, built with 3D Convolutional Neural Networks (CNNs), is trained on public datasets (TCIA, LUNA16) and will be evaluated using metrics such as accuracy, sensitivity, specificity, and AUC-ROC. The talk will include:\n* Preprocessing and augmentation techniques used to handle medical image data.\n* An overview of the 3D CNN architecture and training process.\n* Visualizations of the model's output, showing detected and classified lung nodules.\n* A discussion of how this model could be integrated into a robotic-assisted bronchoscopy system, potentially using ROS, to guide instrument placement during biopsies.\n\nThe session will highlight the practical application of Python's libraries (TensorFlow/PyTorch, OpenCV, Scikit-learn) in medical image analysis and demonstrate how these techniques can contribute to advancements in lung cancer diagnosis and treatment within the MedTech industry.", "recording_license": "", "do_not_record": true, "persons": [{"code": "WWKB3X", "name": "Lilinoe Harbottle", "avatar": "https://cfp.pydata.org/media/avatars/WWKB3X_SiNHYPx.webp", "biography": "Lilinoe Harbottle an Indigenous (K\u0101naka Maoli) Data Scientist passionate about revolutionizing healthcare with technology. At a startup based in San Francisco, she leads AI initiatives, developing cutting-edge models for autonomous systems and natural language processing. Her expertise in Python, SQL, and advanced analytics transforms data into actionable insights. Previously at Johnson & Johnson, she enhanced medical robotic systems, including real-time telemetry for bronchoscopy and urology procedures, improving efficiency. A champion for STEM inclusion, she is active in Google's Women Techmakers and the American Indian Science and Engineering Society (AISES).", "public_name": "Lilinoe Harbottle", "guid": "ef53a252-efaf-5a9e-a373-cfa3df2fce25", "url": "https://cfp.pydata.org/london2025/speaker/WWKB3X/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/DP77TK/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/DP77TK/", "attachments": []}, {"guid": "8f51d7a4-49af-5faa-936e-8020437b5469", "code": "QUNRWL", "id": 77839, "logo": null, "date": "2025-06-08T14:45:00+01:00", "start": "14:45", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77839-debugging-leadership-six-errors-when-moving-from-code-to-management", "url": "https://cfp.pydata.org/london2025/talk/QUNRWL/", "title": "Debugging Leadership: Six Errors when Moving From Code to Management", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Transitioning from a hands-on Pythonista to a leadership role is a journey filled with challenges, and like debugging code, it requires identifying, isolating, and fixing problems. In this talk, I\u2019ll share eight key lessons from my journey from Data Scientist to Co-Founder of a small software company, framed as Python errors.\n\nFrom battling imposter syndrome (ValueError: self-worth not defined), to learning to delegate (DeadlockError: unable to release control), and avoiding burnout (RuntimeError: system overload), this talk offers actionable advice for anyone navigating the leap from technical contributor to technical leader.\n\nExpect a mix of humour, relatable stories, and hard-won lessons as we explore how debugging leadership challenges is just as rewarding (and occasionally frustrating) as debugging code. Whether you\u2019re considering a leadership role or already on the journey, this session will leave you with practical insights to navigate common pitfalls and approach a leadership transition with a clearer understanding of what to expect.", "description": "Transitioning from a technical role to leadership is a unique challenge; it\u2019s no longer about writing clean code or optimizing performance but about empowering teams, making decisions, and balancing competing priorities. In this talk, I\u2019ll share eight key lessons from my own experience. I frame these lessons as Python errors that technical professionals will find relatable and actionable.\n\nKey Lessons:\n\nValueError: self-worth not defined: Imposter syndrome can hit hard when stepping into leadership. I\u2019ll share how I redefined my sense of value and impact beyond just writing code.\n\nDeadlockError: unable to release control: Delegation doesn\u2019t come naturally to many of us. I\u2019ll discuss how learning to let go and trust your team is critical for scaling your impact.\n\nKeyError: culture not found: Leadership isn\u2019t just about building great products; it\u2019s about building great teams. We\u2019ll explore how to create a culture where people can thrive.\n\nAttributeError: clear_message not found: Communication is the cornerstone of effective leadership. I\u2019ll share how I developed this skill to articulate vision, handle negotiations, and navigate tough conversations.\n\nTypeError: instant_gratification is not callable: Unlike coding, leadership rarely provides quick wins. I\u2019ll explain how to find satisfaction in long-term progress and team success.\n\nUnhandledImpactError: cascading effects detected: How you show up as a leader has a ripple effect on your team and clients. I\u2019ll discuss how to be intentional about your presence and its impact.\n\nDependencyError: support module not imported: You can\u2019t do it alone. I\u2019ll share the value of building a support network of mentors, peers, and advisors.\n\nRuntimeError: system overload: Burnout is real, and leadership can amplify it if you\u2019re not careful. We\u2019ll explore strategies to prioritize your own well-being as a leader.\n\nWhat You\u2019ll Gain:\n\nThis session is designed to help technical professionals better understand the realities of leadership transitions. By framing common challenges as Python errors, it provides a relatable and engaging way to explore the pitfalls and opportunities of stepping into leadership. Attendees will leave with:\nKnowledge of what they\u2019re getting themselves into and the challenges they might face.\nLessons learned from my personal experiences of transitioning to leadership.\nTools and advice to approach leadership transitions with practical strategies and a grounded perspective.\n\nWhether you\u2019re contemplating a leadership role or already on the journey, this talk will provide valuable lessons to help you lead with purpose and avoid common mistakes along the way.", "recording_license": "", "do_not_record": false, "persons": [{"code": "AUHQNG", "name": "Matt Upson", "avatar": "https://cfp.pydata.org/media/avatars/AUHQNG_6kKlj2u.webp", "biography": "Dr. Matthew Upson is Co-Founder of Mantis NLP, an AI consultancy specializing in delivering impact through natural language processing and generative AI. With over a decade of experience in data science and software development, he has worked across academia, government, and industry to build innovative AI solutions.\n\nAt the UK Government Digital Service (GDS), Matt contributed to some of the first applications of AI for GOV.UK, and developed an approach to automating workflows \"Reproducible Analytical Pipelines\" which is now widely used across government. Matt is a founding member of the Data Science Section of the Royal Statistical Society, and a fellow of the Software Sustainability Institute.\n\nMatt lives in Valencia, Spain and is a dedicated (very) amateur triathlete and former bushcraft instructor who enjoys connecting with nature. His talks combine technical expertise with personal stories, in a humorous and informal way.", "public_name": "Matt Upson", "guid": "5fe9aa01-fdd1-5d79-a69e-ffc984c5fbcd", "url": "https://cfp.pydata.org/london2025/speaker/AUHQNG/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/QUNRWL/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/QUNRWL/", "attachments": []}, {"guid": "939f0348-e71c-58c4-b77e-aa682bed7956", "code": "FM3UCY", "id": 77380, "logo": null, "date": "2025-06-08T15:30:00+01:00", "start": "15:30", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77380-building-a-knowledge-graph-for-climate-policy", "url": "https://cfp.pydata.org/london2025/talk/FM3UCY/", "title": "Building a knowledge graph for climate policy", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "At Climate Policy Radar, we're building an open-source knowledge graph for climate policy. In this talk, we'll share how we combine in-house expertise with scalable data infrastructure to identify key concepts in thousands of global climate policy documents. We'll also touch on ontology design, equitable evaluation, and the climate impacts of AI.", "description": "We'll take you on a technical deep-dive into how we've built and scaled a knowledge graph which maps the relationships between thousands of climate policy concepts, and identifies where those concepts appear in our corpus of climate policy and other climate-relevant documents.\n\nWe'll share the high-level methodology, infrastructure decisions, and evaluation framework which have allowed our small team to process millions of passages of text while maintaining high standards for fairness and accuracy.\n\nAfter covering the basics of what a knowledge graph is, and why you might want to build one, we'll cover:\n\n1. **Knowledge Graph Architecture & Methodology**\n   - An ontology which can handle the complexity of the climate policy domain\n   - Interoperability considerations with existing sub-domain taxonomies\n   - Why we're building in the open with Wikibase\n   - The value of real human expertise\n\n2. **Classifier Development & Evaluation**\n   - A common model for classifiers, which can encompass a range of architectures from straightforward regexes, to fine-tuned BERT-based models, to optimised calls to third-party LLMs\n   - Sampling strategies for building representative evaluation datasets\n   - Quantitative metrics vs qualitative vibe-checks for classifier selection\n\n3. **Production Infrastructure & Scaling**\n   - A modular pipeline design separating model management, inference, and indexing\n   - Prefect-based orchestration for distributed inference\n   - Infrastructure as code with Pulumi\n   - Planned integration with our existing search and RAG systems\n\nThe audience should leave the talk with a clear understanding of:\n\n- Practical considerations when building domain-specific, high-impact knowledge graphs\n- Methods for evaluating NLP classifier performance in technical domains\n- Approaches to scaling inference pipelines, from local experimentation to routine cloud-based deployments\n- How we plan to use our knowledge graph to power a climate policy research platform, including integrations with RAG and other LLM-driven systems\n\nThis talk should be particularly stimulating for data scientists and engineers working on information retrieval systems, knowledge graphs, or other high-impact natural language processing systems.", "recording_license": "", "do_not_record": false, "persons": [{"code": "VSLDWP", "name": "Harrison Pim", "avatar": "https://cfp.pydata.org/media/avatars/VSLDWP_p9XG7Qj.webp", "biography": "I'm a data scientist / machine learning engineer with a background in computational / quantum physics. I write loads of python and typescript, and a little bit of everything else.\n\nI like working on hard R&D problems involving computer vision, natural language processing, graph theory, representation learning, recommendation systems, and information retrieval.\n\nI love turning those research projects into end-to-end pipelines and services which help people in the real world.", "public_name": "Harrison Pim", "guid": "f7cb0e26-d0fe-55d9-bd14-7530ef19c626", "url": "https://cfp.pydata.org/london2025/speaker/VSLDWP/"}, {"code": "PRTTM8", "name": "Fred O'Loughlin", "avatar": "https://cfp.pydata.org/media/avatars/PRTTM8_9OBRObs.webp", "biography": null, "public_name": "Fred O'Loughlin", "guid": "a86595eb-734a-5b50-931f-0864e00837e6", "url": "https://cfp.pydata.org/london2025/speaker/PRTTM8/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/FM3UCY/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/FM3UCY/", "attachments": []}, {"guid": "6211352d-c04f-572b-aa4f-d5a2920427d7", "code": "PGTEWH", "id": 77492, "logo": null, "date": "2025-06-08T16:15:00+01:00", "start": "16:15", "duration": "00:45", "room": "Hardwick Hub", "slug": "london2025-77492-transfer-learning-leveraging-pretrained-models-with-limited-data", "url": "https://cfp.pydata.org/london2025/talk/PGTEWH/", "title": "Transfer Learning: Leveraging Pretrained Models with Limited Data", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "Transfer learning has revolutionised machine learning by enabling models trained on large datasets to generalise effectively to tasks with limited data. This talk explores strategies for adapting pretrained models to new domains, focusing on audio processing as a case study. Using YAMNet, Whisper, and wav2vec2 for laughter detection, we demonstrate how to extract meaningful representations, fine-tune models efficiently, and handle severe class imbalances. The session covers feature extraction, model fusion techniques, and best practices for optimising performance in data-scarce environments. Attendees will gain practical insights into applying transfer learning across various modalities beyond audio, maximising model effectiveness when labelled data is scarce.", "description": "This talk provides a comprehensive exploration of transfer learning, focusing on how pretrained models can be leveraged for tasks with limited labelled data. It begins with an introduction to the core principles of transfer learning, covering different strategies such as feature extraction, fine-tuning, and domain adaptation. The session then delves into the benefits and challenges of using pretrained models, helping attendees understand when and how to apply these techniques effectively.\nWe will discuss how to choose and adapt pretrained models, with a specific focus on YAMNet, Whisper, and wav2vec2 for audio processing. The talk will cover strategies for handling limited data and severe class imbalance, including data augmentation, synthetic data generation, and advanced loss functions. Attendees will gain insights into fine-tuning techniques, such as layer-wise training and regularisation, to optimise model performance while preventing overfitting. A case study on laughter detection will illustrate these concepts in practice, demonstrating how multiple models can be combined for improved accuracy. Finally, we will explore applications beyond audio, including transfer learning in NLP and computer vision, highlighting cross-domain adaptation techniques and emerging trends in multimodal AI.", "recording_license": "", "do_not_record": false, "persons": [{"code": "W8M37R", "name": "Salman Khan", "avatar": "https://cfp.pydata.org/media/avatars/W8M37R_p0y8S2l.webp", "biography": "Salman Khan is the Director of Data Science at Afiniti, where he drives innovative solutions to complex business challenges through data science. With a specialization in machine learning, statistical modelling, and a strong focus on generative AI, Salman leads multiple teams of data scientists and engineers in the development and deployment of cutting-edge AI-driven applications. Salman has led AI projects delivering measurable business value, including real-time prediction systems, advanced language models, semantic search platforms, and generative AI applications. Salman\u2019s expertise spans deep learning, probabilistic modelling, and a broad range of data science techniques, with advanced proficiency in Python, R, and SQL.", "public_name": "Salman Khan", "guid": "835208f8-5e79-5026-9d5e-ff6b288f8957", "url": "https://cfp.pydata.org/london2025/speaker/W8M37R/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/PGTEWH/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/PGTEWH/", "attachments": []}], "Library": [{"guid": "b27b1efb-e9d6-5d66-81ab-1430f6e234b3", "code": "TCFWVY", "id": 77503, "logo": null, "date": "2025-06-08T11:45:00+01:00", "start": "11:45", "duration": "00:45", "room": "Library", "slug": "london2025-77503-leaders-at-pydata", "url": "https://cfp.pydata.org/london2025/talk/TCFWVY/", "title": "Leaders at PyData", "subtitle": "", "track": null, "type": "Talk", "language": "en", "abstract": "A self-organised workshop for data leaders to discuss the opportunity and challenges they face with their peers. This is the 9th iteration at a PyData conference. Questions are raised and answered by attendees, it is facilitated by Ian Ozsvald (PyDataLondon co-founder). You are encouraged to carry on talking to fellow leaders after this session, Ian will give out badges to help with this.\n\nThe format is based on the Breakout discussions that Ian uses in his private RebelAI leadership group, you're welcome and encouraged to copy and use it in your own organisations. Typical attendance is 60+ leaders.\n\nThe 2022 session using a different format (\"Executives at PyData\" as it was known) was written up, you can see it here: https://numfocus.medium.com/executives-at-pydata-global-2022-193cbc2d3f3b", "description": "A self-organised workshop for data leaders to discuss the opportunity and challenges they face with their peers. This is the 9th iteration at a PyData conference. Questions are raised and answered by attendees, it is facilitated by Ian Ozsvald (PyDataLondon co-founder). You are encouraged to carry on talking to fellow leaders after this session, Ian will give out badges to help with this.\n\nThe format is based on the Breakout discussions that Ian uses in his private RebelAI leadership group, you're welcome and encouraged to copy and use it in your own organisations. Typical attendance is 60+ leaders.\n\nThe 2022 session using a different format (\"Executives at PyData\" as it was known) was written up, you can see it here: https://numfocus.medium.com/executives-at-pydata-global-2022-193cbc2d3f3b", "recording_license": "", "do_not_record": false, "persons": [{"code": "QPPQER", "name": "Ian Ozsvald", "avatar": "https://cfp.pydata.org/media/avatars/QPPQER_PgOYnuf.webp", "biography": "Ian is a Chief Data Scientist, founder of the RebelAI leadership community, has co-founded and built the annual PyDataLondon conference raising $100k+ annually for the open source movement along with the associated 14,000+ member monthly meetup. Using data science he's helped clients find $2M in recoverable fraud, created the core IP which opened funding rounds for automated recruitment start-ups and diagnosed how major media companies can better supply recommendations to viewers. He gives conference talks internationally often as keynote speaker and is the author of the bestselling O'Reilly book High Performance Python (3rd edition). He has over 26 years of experience as a senior data science leader, trainer and team coach. For fun he's walked by his high-energy Springer Spaniel, surfs the Cornish coast and drinks fine coffee. Past talks and articles can be found at: \n\n* https://www.linkedin.com/in/ianozsvald/\n* https://ianozsvald.com/\n* https://notanumber.email/\n* https://github.com/ianozsvald/\n* https://twitter.com/ianozsvald", "public_name": "Ian Ozsvald", "guid": "325a0c0d-2fce-5f52-b006-bde853e03734", "url": "https://cfp.pydata.org/london2025/speaker/QPPQER/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/TCFWVY/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/TCFWVY/", "attachments": []}, {"guid": "f7eb9fec-c70d-58e9-94d9-25d75199f006", "code": "HQH7DY", "id": 77581, "logo": null, "date": "2025-06-08T14:45:00+01:00", "start": "14:45", "duration": "01:30", "room": "Library", "slug": "london2025-77581-humble-data-workshop", "url": "https://cfp.pydata.org/london2025/talk/HQH7DY/", "title": "Humble Data Workshop", "subtitle": "", "track": null, "type": "Tutorial", "language": "en", "abstract": "Learn Python for Data Science in this Beginners\u2019 Day Workshop Would you like to learn to code but don\u2019t know where to start? Taking your first steps in programming can seem like an impossible task so we\u2019ve decided to put on a workshop to show beginners how it can be done and share our passion for the world of data science! \n\nApply to be a student https://forms.gle/2cvNyRK8c8pNnpnz5", "description": "We invite those from under-represented groups in data to apply to join us at PyData London (8th June 2024) Humble Data Workshop. In this workshop, you will learn the basics of programming in Python, as well as how to use tools such as Jupyter Notebook to analyse data.\n\nWe wish to be able to run a workshop with as many participants as we can accommodate online, however, we also need a lot of mentors to help out. Being a mentor not only help you to familiarise yourself with your knowledge in data science, it also gives you a good vibe afterwards. If you have the skill to share, we are happy to welcome you to our Humble Data family. \n\nApply to be a mentor https://forms.gle/2cvNyRK8c8pNnpnz5", "recording_license": "", "do_not_record": false, "persons": [{"code": "DYTMCM", "name": "Hugh Evans", "avatar": "https://cfp.pydata.org/media/avatars/DYTMCM_Kd5bXgn.webp", "biography": "Hi, I'm Hugh.\n\nI'm an experienced developer advocate and community manager with a particular interest in data and AI. I have been working in IT for over 5 years, working on large scale software projects and cloud infrastructure.\n\nOut of office hours I organise AI and Deep Learning for Enterprise (aidle.uk) , a Meetup group which hosts talks on real world applications of AI.\n\nI'm a former apprentice and an advocate for vocational learning as a pathway into an IT career.", "public_name": "Hugh Evans", "guid": "a70f8108-e7a1-58b3-96cd-930420678b42", "url": "https://cfp.pydata.org/london2025/speaker/DYTMCM/"}], "links": [], "feedback_url": "https://cfp.pydata.org/london2025/talk/HQH7DY/feedback/", "origin_url": "https://cfp.pydata.org/london2025/talk/HQH7DY/", "attachments": []}]}}]}}}