# Step 1: Create a dataframe with hourly temperature data for a full year

# Let's assume the dataframe is named "temp_data" and has two columns: "datetime" (with hourly timestamps) and "temperature" (with hourly temperature readings).
df <- read.csv("data2020.csv",col.names=c("DateTime", "DB", "RH","DP","P","WIND","WIND-DIR","SUN","RAIN","START"))

df=df[complete.cases(df), ]
df$DB=(df$DB/10)
df$RH=(df$RH/100)
df$P=(df$P*100)
# Step 1: Create a dataframe with hourly temperature and relative humidity data for a full year
# Let's assume the dataframe is named "df" and has three columns: "datetime" (with hourly timestamps), "DB" (with hourly dry bulb temperature readings), and "RH" (with hourly relative humidity readings).
# Step 2: Calculate the total number of hours in a year
# Step 2: Calculate the total number of hours in a year
total_hours <- nrow(df)

print(total_hours)
nrow(unique(df[,c("DB","RH")]))
# Step 1: Create a dataframe with hourly temperature and relative humidity data for a full year
# Let's assume the dataframe is named "df" and has three columns: "datetime" (with hourly timestamps), "DB" (with hourly dry bulb temperature readings in Fahrenheit), and "RH" (with hourly relative humidity readings).

# Step 2: Calculate the total number of hours in a year
total_hours <-nrow(df)

# Step 3: Create a new dataframe named "temp_pct" with four columns: "DB", "RH", "pct_exceeded", and "RH_pct_exceeded"
temp_pct <- data.frame(DB = df$DB, RH = numeric(length(df$DB)), pct_exceeded = numeric(length(df$DB)), RH_pct_exceeded = numeric(length(df$DB)))

# Step 4: For each temperature value in the "DB" column of the "df" dataframe, calculate the percentage of time that temperature is exceeded and the average relative humidity for all rows that have the same temperature, and store them in the corresponding rows of the "temp_pct" dataframe
for (i in seq_along(df$DB)) {
  temp <- df$DB[i]
  hours_exceeded <- sum(df$DB > temp)
  pct_exceeded <- (hours_exceeded / total_hours)*100
  avg_rh <- mean(df$RH[df$DB == temp])
  temp_pct$RH[i] <- avg_rh
  temp_pct$pct_exceeded[i] <- pct_exceeded
  temp_pct$RH_pct_exceeded[i] <- avg_rh
}

# Step 5: Calculate the average relative humidity for each unique value of "pct_exceeded" and store it in the "RH_pct_exceeded" column of the "temp_pct" dataframe
for (pct in unique(temp_pct$pct_exceeded)) {
  avg_rh <- mean(temp_pct$RH_pct_exceeded[temp_pct$pct_exceeded == pct])
  temp_pct$RH_pct_exceeded[temp_pct$pct_exceeded == pct] <- avg_rh
}

# Step 6: Order the "temp_pct" dataframe by the "pct_exceeded" column, with lowest percentage first
temp_pct <- temp_pct[order(temp_pct$pct_exceeded), ]

# Step 7: Print the first 20 rows of the "temp_pct" dataframe
head(temp_pct, 20)

# Step 8: Create a list of unique combinations of "RH_pct_exceeded" and "pct_exceeded"
combo_list <- unique(temp_pct[, c("RH_pct_exceeded", "pct_exceeded")])
# Step 3: Create a new dataframe named "temp_pct" with five columns: "DB", "RH", "pct_exceeded", "RH_pct_exceeded", and "DB_pct_exceeded"