# Step 1: Create a dataframe with hourly temperature data for a full year

# Let's assume the dataframe is named "temp_data" and has two columns: "datetime" (with hourly timestamps) and "temperature" (with hourly temperature readings).
df <- read.csv("dataWEEK.csv",col.names=c("DateTime", "DB", "RH","DP","P","WIND","WIND-DIR","SUN","RAIN","START"))

df=df[complete.cases(df), ]
df$DB=(df$DB/10)
df$RH=(df$RH/100)
df$P=(df$P*100)
# Step 1: Create a dataframe with hourly temperature and relative humidity data for a full year
# Let's assume the dataframe is named "df" and has three columns: "datetime" (with hourly timestamps), "DB" (with hourly dry bulb temperature readings), and "RH" (with hourly relative humidity readings).
# Step 2: Calculate the total number of hours in a year
# Step 2: Calculate the total number of hours in a year
total_hours <- nrow(df)

print(total_hours)
nrow(unique(df[,c("DB","RH")]))
# Step 3: Create a new dataframe named "temp_pct" with five columns: "DB", "RH", "pct_exceeded", "RH_pct_exceeded", and "DB_pct_exceeded"
temp_pct <- data.frame(DB = numeric(0), RH = numeric(0), pct_exceeded = numeric(0), RH_pct_exceeded = numeric(0), DB_pct_exceeded = numeric(0))

# Step 4: For each temperature value in the "DB" column of the "df" dataframe, calculate the percentage of time that temperature is exceeded, the relative humidity for the first occurrence of that temperature, and append them to the "temp_pct" dataframe
for (temp in df$DB) {
  hours_exceeded <- sum(df$DB > temp)
  pct_exceeded <- hours_exceeded / total_hours
  rh <- df$RH[df$DB == temp]
  temp_pct <- rbind(temp_pct, data.frame(DB = temp, RH = rh, pct_exceeded = pct_exceeded))
}
nrow(temp_pct)

# Step 5: Calculate the average relative humidity and dry bulb temperature for each unique value of "pct_exceeded" and store them in the "RH_pct_exceeded" and "DB_pct_exceeded" columns of the "temp_pct" dataframe
for (pct in (temp_pct$pct_exceeded)) {
  avg_rh <- mean(temp_pct$RH[temp_pct$pct_exceeded == pct])
  temp_pct$RH_pct_exceeded[temp_pct$pct_exceeded == pct] <- avg_rh
  avg_db <- mean(temp_pct$DB[temp_pct$pct_exceeded == pct])
  temp_pct$DB_pct_exceeded[temp_pct$pct_exceeded == pct] <- avg_db
}

nrow(temp_pct)
# Step 6: Order the "temp_pct" dataframe by the "pct_exceeded" column, with lowest percentage first
temp_pct <- temp_pct[order(temp_pct$pct_exceeded), ]

# Step 7: Print the first 20 rows of the "temp_pct" dataframe
head(temp_pct, 150)
combo_list <- unique(temp_pct[, c("RH_pct_exceeded", "DB_pct_exceeded", "pct_exceeded")])
head(combo_list,n=100)