CentraleSupélec LMF, UMR CNRS 9021
Département informatique Laboratoire Méthodes Formelles
Bât Breguet, 3 rue Joliot-Curie Bât 650 Ada Lovelace, Université Paris Sud
91190 Gif-sur-Yvette, France Rue Noetzlin, 91190 Gif-sur-Yvette, France
TD n°4 et 5 SIP

Le sujet SIP-TD04-05.pdf

Éléments de corrigé

Programme de test

Ce programme suppose que le code est placé dans un fichier "td4.py".

import pytest

import td4

def test_read_float() :
    assert(td4.read_float("1 234,56€") == 1234.56)
    assert(td4.read_float("100.20") == 100.20)
    assert(td4.read_float("£200.65") == 200.65)

Réponse aux questions

#############
# Question 1
#############
# Voir https://fr.wikipedia.org/wiki/Comma-separated_values

#############
# Question 2
#############
# Les fichiers Excel sont très populaires pour traiter de petits problèmes
# comme celui-ci, et l'export en CSV est une façon simple de récupérer les données.
# Une base de données demande un travail de conception plus important qui n'est peut-être 
# pas justifié ici.

##################
# Question 3 et 4
##################
import csv
import datetime
import doctest

def open_csv(filename):
    try:
        with open(filename) as csvfile:
            file_reader = csv.reader(csvfile, delimiter=";")
            for line in file_reader:
                for column in line:
                    print(column, '| ', end='')
                print()
    except OSError as err :
        print("Error {} "{}"".format(type(err).__name__, filename))

#############
# Question 5
#############
# Il suffit de remplacer la ligne 'print("Error {} ...)' par 'raise NameError("The file cannot be read")'

#############
# Question 6
#############
# Nous n'utiliserons pas de classe ici. Un simple dictionnaire suffira.

##################
# Question 7 à 16
##################
# Lecture d'un date au format jour/mois/année
def read_date(s):
    return datetime.datetime.strptime(s, "%d/%m/%Y")

# Lecture d'un flottant en supprimant les caractères indésirables
# et en transformant les virgules en points.
def read_float(s) :
    """
    Converts a string into a float
    >>> read_float("1 234,5€")
    1234.5
    """
    clean = ""
    for c in s :
        if c in "0123456789." :
            clean += c
        elif c == ',' :
            clean += '.'
    return float(clean)

# Lecture d'un booléen
def read_bool(s):
    return s.lower() in ["true", "yes", "1", "ok", "oui"]

# Chargement des données depuis le fichier CSV
def load_data(filename):
    try:
        students = []
        with open(filename) as csvfile:
            file_reader = csv.reader(csvfile, delimiter=";")
            headers = next(file_reader)  # read headers
            headers = [item.lower() for item in headers]
            for line in file_reader:
                student = {}
                for i in range(len(headers)):
                    if i == 3 :
                        student[headers[i]] = read_float(line[i])
                    elif i == 4 :
                        student[headers[i]] = read_bool(line[i])
                    elif i == 5 :
                        student[headers[i]] = read_date(line[i])
                    else :
                        student[headers[i]] = line[i]
                students.append(student)
        return students
    except OSError as err :
        print("Error {} "{}"".format(type(err).__name__, filename))


##############
# Question 17
##############
def remove_duplicates(data) :
    kept = []         # list of kept entries
    already_seen = {} # dictionary of already seen email addresses
    for student in data :
        if "mail" not in student or len(student["mail"]) == 0 :
            print("No email in " + str(student))
            kept.append(student)   # We keep students with no e-mail
        else :
            # If a student has already been seen, we check the registration date
            # and replace the former entry if the current one has an older registration date
            if student["mail"] in already_seen :
                stud_idx = already_seen[student["mail"]]
                if student["registration_date"] < kept[stud_idx]["registration_date"] :
                    kept[stud_idx] = student
            else:
                # We append the student to the list of kept student
                # and put its index in the list in the dictionary of already seen students
                already_seen[student["mail"]] = len(kept)
                kept.append(student)
    return kept

##############
# Question 18
##############
import operator

def  remove_participants(students, n) :
    # Build the list of students who have paid
    paid = [stud for stud in students if stud['paid']]
    # Build the list of students who have not paid yet
    not_paid = [stud for stud in students if not stud['paid']]
    # Sort the lists by increasing registration dates
    paid.sort(key = operator.itemgetter('registration_date'))
    not_paid.sort(key = operator.itemgetter('registration_date'))
    # Build the total list with paid first, then not paid, but ordered by registration date
    total = paid+not_paid
    # Keep only the n first students
    return total[0:n]

# If the file is run as a python script, run the tests in the docstrings
if __name__ == '__main__' :
    print(doctest.testmod())