@__DIR__

"/Users/blaschke/Developer/hpc-julia/docs/julia for data science/01_data"


data_directory = joinpath(@__DIR__, "..", "..", "..", "exercises", "covid", "data")

"/Users/blaschke/Developer/hpc-julia/docs/julia for data science/01_data/../../../exercises/covid/data"


temp_directory = mktempdir()

"/var/folders/gy/fk8y1bkd5b78l0n687jwhzkc0029yh/T/jl_H93q4P"


readdir(temp_directory)

String[]


file_path = joinpath(temp_directory, "a.dat")

"/var/folders/gy/fk8y1bkd5b78l0n687jwhzkc0029yh/T/jl_H93q4P/a.dat"


a = [4, 2, 3]

3-element Vector{Int64}:
 4
 2
 3


write(file_path, a)

24


readdir(temp_directory)

1-element Vector{String}:
 "a.dat"


read(file_path, Int64)

4


read(file_path, String)

"\x04\0\0\0\0\0\0\0\x02\0\0\0\0\0\0\0\x03\0\0\0\0\0\0\0"


open(file_path, "r") do io
    while !eof(io)
        print(read(io, Int64), ",")
    end
end

4,2,3,


using JSON


d = Dict(
    "a"=>1,
    "b"=>"hello"
)

Dict{String, Any} with 2 entries:
  "b" => "hello"
  "a" => 1


json(d)

"{\"b\":\"hello\",\"a\":1}"


json_file_path = joinpath(temp_directory, "d.json")
write(json_file_path, json(d))

19


d_string = read(json_file_path, String)
JSON.parse(d_string)

Dict{String, Any} with 2 entries:
  "b" => "hello"
  "a" => 1


using DataFrames
using CSV


df = DataFrame(name=String[], age=Float64[], coffees=Int64[])


push!(df, ("Johannes", 36.5, 10))


push!(df, ("Christin", 34.1, 2))


coffee_file_path = joinpath(temp_directory, "coffee.csv")
CSV.write(coffee_file_path, df)

"/var/folders/gy/fk8y1bkd5b78l0n687jwhzkc0029yh/T/jl_H93q4P/coffee.csv"


readdir(temp_directory)

3-element Vector{String}:
 "a.dat"
 "coffee.csv"
 "d.json"


open(joinpath(temp_directory, "coffee.csv")) do io
    for line in readlines(io)
        println(line)
    end
end

name,age,coffees
Johannes,37.5,10
Christin,34.1,2


CSV.File(coffee_file_path)

2-element CSV.File:
 CSV.Row: (name = "Johannes", age = 36.5, coffees = 10)
 CSV.Row: (name = "Christin", age = 34.1, coffees = 2)


CSV.File(coffee_file_path) |> DataFrame


readdir(data_directory)

1-element Vector{String}:
 "total-covid-cases-deaths-per-million.csv"


data = CSV.File(joinpath(data_directory, "total-covid-cases-deaths-per-million.csv")) |> DataFrame
dropmissing!(data) # Data is never perfect!


describe(data)


data[:, "Total confirmed cases of COVID-19 per million people"]

141467-element Vector{Float64}:
     1.004
     1.054
     1.858
     2.008
     2.284
     2.661
     2.862
     2.862
     4.167
     4.82
     5.899
     6.753
     6.778
     ⋮
 16168.118
 16197.272
 16197.272
 16212.711
 16212.711
 16230.799
 16246.437
 16276.32
 16276.32
 16287.915
 16295.005
 16302.625


data[data[:, "Code"] .== "USA", :]

	Entity	Code	Day	Total confirmed deaths due to COVID-19 per million people
	String	String15	Date	Float64
1	Afghanistan	AFG	2020-03-23	0.025
2	Afghanistan	AFG	2020-03-24	0.025
3	Afghanistan	AFG	2020-03-25	0.025
4	Afghanistan	AFG	2020-03-26	0.05
5	Afghanistan	AFG	2020-03-27	0.05
6	Afghanistan	AFG	2020-03-28	0.05
7	Afghanistan	AFG	2020-03-29	0.1
8	Afghanistan	AFG	2020-03-30	0.1
9	Afghanistan	AFG	2020-03-31	0.1
10	Afghanistan	AFG	2020-04-01	0.1
11	Afghanistan	AFG	2020-04-02	0.1
12	Afghanistan	AFG	2020-04-03	0.126
13	Afghanistan	AFG	2020-04-04	0.126
14	Afghanistan	AFG	2020-04-05	0.176
15	Afghanistan	AFG	2020-04-06	0.176
16	Afghanistan	AFG	2020-04-07	0.276
17	Afghanistan	AFG	2020-04-08	0.351
18	Afghanistan	AFG	2020-04-09	0.377
19	Afghanistan	AFG	2020-04-10	0.377
20	Afghanistan	AFG	2020-04-11	0.377
21	Afghanistan	AFG	2020-04-12	0.452
22	Afghanistan	AFG	2020-04-13	0.477
23	Afghanistan	AFG	2020-04-14	0.552
24	Afghanistan	AFG	2020-04-15	0.628
25	Afghanistan	AFG	2020-04-16	0.728
26	Afghanistan	AFG	2020-04-17	0.753
27	Afghanistan	AFG	2020-04-18	0.753
28	Afghanistan	AFG	2020-04-19	0.753
29	Afghanistan	AFG	2020-04-20	0.828
30	Afghanistan	AFG	2020-04-21	0.904
⋮	⋮	⋮	⋮	⋮

	variable	mean	min
	Symbol	Union…	Any
1	Entity		Afghanistan
2	Code		ABW
3	Day		2020-01-22
4	Total confirmed deaths due to COVID-19 per million people	527.4	0.001
5	Total confirmed cases of COVID-19 per million people	36548.1	0.018

	Entity	Code	Day	Total confirmed deaths due to COVID-19 per million people
	String	String15	Date	Float64
1	United States	USA	2020-02-29	0.003
2	United States	USA	2020-03-01	0.003
3	United States	USA	2020-03-02	0.018
4	United States	USA	2020-03-03	0.021
5	United States	USA	2020-03-04	0.033
6	United States	USA	2020-03-05	0.036
7	United States	USA	2020-03-06	0.042
8	United States	USA	2020-03-07	0.051
9	United States	USA	2020-03-08	0.063
10	United States	USA	2020-03-09	0.066
11	United States	USA	2020-03-10	0.084
12	United States	USA	2020-03-11	0.099
13	United States	USA	2020-03-12	0.129
14	United States	USA	2020-03-13	0.153
15	United States	USA	2020-03-14	0.174
16	United States	USA	2020-03-15	0.21
17	United States	USA	2020-03-16	0.291
18	United States	USA	2020-03-17	0.403
19	United States	USA	2020-03-18	0.583
20	United States	USA	2020-03-19	0.799
21	United States	USA	2020-03-20	1.117
22	United States	USA	2020-03-21	1.427
23	United States	USA	2020-03-22	1.811
24	United States	USA	2020-03-23	2.373
25	United States	USA	2020-03-24	3.103
26	United States	USA	2020-03-25	4.103
27	United States	USA	2020-03-26	5.356
28	United States	USA	2020-03-27	6.924
29	United States	USA	2020-03-28	9.065
30	United States	USA	2020-03-29	10.732
⋮	⋮	⋮	⋮	⋮

File I/O¶

Interacting with the Filesystem¶

Read and Writing to Files Directly¶

JSON¶

CSV¶

DataFrames¶

	name	age	coffees
	String	Float64	Int64
1	Johannes	36.5	10

	name	age	coffees
	String	Float64	Int64
1	Johannes	36.5	10
2	Christin	34.1	2

	name	age	coffees
	String15	Float64	Int64
1	Johannes	36.5	10
2	Christin	34.1	2