# Step 1: Create a dataframe with hourly temperature data for a full year

# Let's assume the dataframe is named "temp_data" and has two columns: "datetime" (with hourly timestamps) and "temperature" (with hourly temperature readings).
df <- read.csv("dataWEEK.csv",col.names=c("DateTime", "DB", "RH","DP","P","WIND","WIND-DIR","SUN","RAIN","START"))

df=df[complete.cases(df), ]
df$DB=(df$DB/10)
df$RH=(df$RH/100)
df$P=(df$P*100)
# Step 1: Create a dataframe with hourly temperature and relative humidity data for a full year
# Let's assume the dataframe is named "df" and has three columns: "datetime" (with hourly timestamps), "DB" (with hourly dry bulb temperature readings), and "RH" (with hourly relative humidity readings).
# Step 2: Calculate the total number of hours in a year
# Step 2: Calculate the total number of hours in a year
# Step 1: Create a dataframe with hourly temperature and relative humidity data for a full year
# Let's assume the dataframe is named "df" and has three columns: "datetime" (with hourly timestamps), "temperature" (with hourly temperature readings in Fahrenheit), and "RH" (with hourly relative humidity readings).

# Step 1: Create a dataframe with hourly temperature and relative humidity data for a full year
# Let's assume the dataframe is named "df" and has three columns: "datetime" (with hourly timestamps), "temperature" (with hourly temperature readings in Fahrenheit), and "RH" (with hourly relative humidity readings).

# Step 2: Create a new dataframe named "temp_pct" with three columns: "temperature", "pct_exceeded", and "RH_pct_exceeded"
temp_pct <- data.frame(temperature = unique(df$temperature), pct_exceeded = numeric(length(unique(df$temperature))), RH_pct_exceeded = numeric(length(unique(df$temperature))))

# Step 3: For each temperature value in the "temperature" column of the "df" dataframe, calculate the percentage of time that temperature is exceeded and store it in the corresponding row of the "temp_pct" dataframe
for (i in seq_along(unique(df$temperature))) {
  temp <- unique(df$temperature)[i]
  rows_exceeded <- sum(df$temperature > temp)
  pct_exceeded <- rows_exceeded/((nrow(df)*100))
  temp_pct$pct_exceeded[i] <- pct_exceeded
}

# Step 4: Calculate the average relative humidity for each unique value of "pct_exceeded" and store it in the "RH_pct_exceeded" column of the "temp_pct" dataframe
for (pct in unique(round(temp_pct$pct_exceeded * 2) / 2)) {
  avg_rh <- mean(df$RH[df$temperature > temp_pct$temperature[round(temp_pct$pct_exceeded * 2) / 2 == pct]])
  temp_pct$RH_pct_exceeded[round(temp_pct$pct_exceeded * 2) / 2 == pct] <- avg_rh
}

# Step 5: Group the "temp_pct" dataframe by the "pct_exceeded" column, rounded to 0.5 percent accuracy
temp_pct_grouped <- aggregate(RH_pct_exceeded ~ round(pct_exceeded * 2) / 2, data = temp_pct, FUN = mean)
colnames(temp_pct_grouped) <- c("pct_exceeded", "RH_pct_exceeded")

# Step 6: Order the "temp_pct_grouped" dataframe by the "pct_exceeded" column, with lowest percentage first
temp_pct_grouped <- temp_pct_grouped[order(temp_pct_grouped$pct_exceeded), ]

# Step 7: Print the first 20 rows of the "temp_pct_grouped" dataframe
head(temp_pct_grouped, 20)

# Step 8: Create a list of unique combinations of "RH_pct_exceeded" and "pct_exceeded"
combo_list <- unique(temp_pct_grouped[, c("RH_pct_exceeded", "pct_exceeded")])
# Step 2: Create a new dataframe named "temp_pct" with three columns: "temperature", "pct_exceeded", and "RH_pct_exceeded"