### Last Code

In [2]:
import random
import numpy as np
import pandas as pd

# Load the dataset
df = pd.read_parquet('job_table.parquet', engine='fastparquet')

# Select relevant columns
df = df[['req_nodes', 'num_cores_alloc', 'num_gpus_alloc', 'mem_alloc']].dropna()

# Ensure req_nodes is in string format
df['req_nodes'] = df['req_nodes'].astype(str)
df = df.explode('req_nodes')

# Convert memory from MB to GB
df['mem_alloc'] = df['mem_alloc'] / 1024  

# Get the two nodes
node_usage = df.groupby('req_nodes').sum()
nodes = node_usage.index.tolist()

# Identify weakest and strongest node
strongest_node = node_usage.idxmax().unique()[0]
weakest_node = node_usage.idxmin().unique()[0]

# Maximum resources available in the strongest node
max_cores = node_usage.loc[strongest_node, 'num_cores_alloc']
max_gpus = node_usage.loc[strongest_node, 'num_gpus_alloc']
max_mem = node_usage.loc[strongest_node, 'mem_alloc']  # Now in GB

# Jobs assigned to the weakest node
weak_node_jobs = df[df['req_nodes'] == weakest_node]

# Define Genetic Algorithm parameters
POPULATION_SIZE = 50
GENERATIONS = 100
MUTATION_RATE = 0.2

# Fitness function - Determines if jobs can fit into the strongest node
def fitness(individual):
    total_cores = sum(individual[i] * weak_node_jobs.iloc[i]['num_cores_alloc'] for i in range(len(individual)))
    total_gpus = sum(individual[i] * weak_node_jobs.iloc[i]['num_gpus_alloc'] for i in range(len(individual)))
    total_mem = sum(individual[i] * weak_node_jobs.iloc[i]['mem_alloc'] for i in range(len(individual)))

    if total_cores <= max_cores and total_gpus <= max_gpus and total_mem <= max_mem:
        return total_cores + total_gpus + total_mem
    else:
        return 0
        # **If even a single job exceeds 256 GB, this solution is invalid!**
    if any(weak_node_jobs.iloc[i]['mem_alloc'] > 256 for i in range(len(individual)) if individual[i] == 1):       
        return 0  # Jobs that are too large to fit alone cannot be moved

    # If all jobs fit into strongest node, return a high fitness score
    if total_cores <= max_cores and total_gpus <= max_gpus and total_mem <= max_mem:
        return total_cores + total_gpus + total_mem  # Maximize resource utilization
    else:
        return 0  # Invalid solution


# Generate initial population (binary representation: 1 = move job, 0 = keep job in weak node)
population = [np.random.randint(2, size=len(weak_node_jobs)) for _ in range(POPULATION_SIZE)]

# Evolution loop
for generation in range(GENERATIONS):
    # Calculate fitness for each individual
    fitness_scores = [fitness(ind) for ind in population]

    # Select top individuals
    sorted_population = [x for _, x in sorted(zip(fitness_scores, population), reverse=True)]
    population = sorted_population[:POPULATION_SIZE//2]  # Keep the best half

    # Crossover - Combine top individuals
    for _ in range(POPULATION_SIZE//2):
        parent1, parent2 = random.choices(population, k=2)
        crossover_point = random.randint(0, len(weak_node_jobs) - 1)
        child = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        population.append(child)

    # Mutation - Randomly flip a bit
    for individual in population:
        if random.random() < MUTATION_RATE:
            mutate_index = random.randint(0, len(weak_node_jobs) - 1)
            individual[mutate_index] = 1 - individual[mutate_index]  # Flip 0 to 1 or 1 to 0

# Select the best solution
best_solution = max(population, key=fitness)

# Apply job movement based on the best solution
for i, move in enumerate(best_solution):
    if move == 1:
        df.loc[df.index == weak_node_jobs.index[i], 'req_nodes'] = strongest_node

# Check if weakest node is now empty
if df[df['req_nodes'] == weakest_node].empty:
    print(f"Weakest node {weakest_node} is now empty and can be shut down.")
    df = df[df['req_nodes'] != weakest_node]  # Remove the weak node from the dataset

# Display final allocation
print(df[['req_nodes', 'num_cores_alloc', 'num_gpus_alloc', 'mem_alloc']])

Weakest node [23, 24] is now empty and can be shut down.
       req_nodes  num_cores_alloc  num_gpus_alloc  mem_alloc
5          [763]                4               0   0.234375
6          [878]                4               0   0.234375
8           [94]                4               0   0.234375
9          [773]                4               0   0.234375
13         [206]                4               0   0.234375
...          ...              ...             ...        ...
231170     [973]              128               4   0.231445
231177     [970]              128               4   0.231445
231208      [39]              128               4   0.231445
231231     [229]              128               4   0.231445
231234     [386]              128               4   0.231445

[26296 rows x 4 columns]


### Checking the accuracy of codes step by step
Here is the step-by-step breakdown of the code with the limitations and constraints added at each step to ensure that the code functions correctly and efficiently.

In [12]:
# Load the dataset
file_path = "job_table.parquet"  # Ensure the file is in the correct location

try:
    df = pd.read_parquet(file_path, engine='fastparquet')

    # Display the first few rows
    print("ðŸ”¹ First 5 Rows:")
    print(df.head())

    # Show column data types
    print("\nðŸ”¹ Data Types:")
    print(df.dtypes)

    # Check for missing values
    print("\nðŸ”¹ Missing Values:")
    print(df.isnull().sum())

except Exception as e:
    print("Error:", str(e))

ðŸ”¹ First 5 Rows:
                                  cores_alloc_layout  \
0  {900: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1...   
1  {687: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1...   
2  {687: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1...   
3                    {416: [0, 1, 2, 3, 4, 5, 6, 7]}   
4                    {416: [0, 1, 2, 3, 4, 5, 6, 7]}   

                                     cores_allocated  cores_per_task  \
0  {900: 128, 915: 128, 902: 128, 901: 128, 904: ...               4   
1  {687: 128, 688: 128, 681: 128, 682: 128, 680: ...               4   
2  {687: 128, 688: 128, 681: 128, 682: 128, 680: ...               4   
3                                          {416: 32}              32   
4                                          {416: 32}              32   

  derived_ec             eligible_time                  end_time  group_id  \
0        1:0 2020-05-31 22:09:29+00:00 2020-05-31 22:21:33+00:00     25200   
1        1:0 2020-05-31 22:22:08+00:00 2020-05-31 22:41

In [13]:
# Identifying Strongest and Weakest Nodes
# Select relevant columns and remove missing values
df = df[['req_nodes', 'num_cores_alloc', 'num_gpus_alloc', 'mem_alloc']].dropna()

# Convert 'req_nodes' to string and explode it
df['req_nodes'] = df['req_nodes'].astype(str)
df = df.explode('req_nodes')

# Convert memory from MB to GB
df['mem_alloc'] = df['mem_alloc'] / 1024

# Group by node and calculate total resource usage
node_usage = df.groupby('req_nodes').sum()
nodes = node_usage.index.tolist()

# Identify the strongest and weakest nodes
strongest_node = node_usage.idxmax().unique()[0]
weakest_node = node_usage.idxmin().unique()[0]

# Print results
print(f"ðŸ”¹ Strongest Node: {strongest_node}")
print(f"ðŸ”¹ Weakest Node: {weakest_node}")

print("\nðŸ”¹ Node Resource Usage:")
print(node_usage)

ðŸ”¹ Strongest Node: [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
ðŸ”¹ Weakest Node: [23, 24]

ðŸ”¹ Node Resource Usage:
                                                    num_cores_alloc  \
req_nodes                                                             
[0]                                                             400   
[100, 101, 102, 103, 104, 105, 106, 107, 108, 1...             8192   
[100]                                                          2972   
[101, 102, 103]                                                  96   
[101]                                                          4084   
...                                                             ...   
[97]                                                            788   
[98]                                                           1008   
[99]                                                            384   
[9]                                                             272   
[]     

In [14]:
# Genetic Algorithm parameters
POPULATION_SIZE = 50
GENERATIONS = 100
MUTATION_RATE = 0.2

# Maximum resources available in the strongest node
max_cores = node_usage.loc[strongest_node, 'num_cores_alloc']
max_gpus = node_usage.loc[strongest_node, 'num_gpus_alloc']
max_mem = node_usage.loc[strongest_node, 'mem_alloc']

# Get jobs assigned to the weakest node
weak_node_jobs = df[df['req_nodes'] == weakest_node]

# Fitness function to evaluate individuals
def fitness(individual):
    total_cores = 0
    total_gpus = 0
    total_mem = 0

    for i in range(len(individual)):
        if individual[i] == 1:
            job = weak_node_jobs.iloc[i]
            
            # **New Limit Check:** Skip jobs that exceed individual limits
            if job['num_cores_alloc'] > max_cores or job['num_gpus_alloc'] > max_gpus or job['mem_alloc'] > max_mem:
                return 0  # Invalid solution
            
            total_cores += job['num_cores_alloc']
            total_gpus += job['num_gpus_alloc']
            total_mem += job['mem_alloc']

    # **Check if total resource use is within the strongest node's limits**
    if total_cores <= max_cores and total_gpus <= max_gpus and total_mem <= max_mem:
        return total_cores + total_gpus + total_mem  # Maximize utilization
    else:
        return 0  # Invalid solution

# Generate initial population
population = [np.random.randint(2, size=len(weak_node_jobs)) for _ in range(POPULATION_SIZE)]

# Calculate fitness for the first generation
fitness_scores = [fitness(ind) for ind in population]

# Print the best initial solution score
best_initial_solution = max(fitness_scores)
print(f"ðŸ”¹ Best Initial Solution Score: {best_initial_solution}")

ðŸ”¹ Best Initial Solution Score: 16.013671875


In [15]:
# Apply job movement
for i, move in enumerate(best_solution):
    if move == 1:
        job = weak_node_jobs.iloc[i]

        # **Final Safety Check Before Moving**
        if job['num_cores_alloc'] > max_cores or job['num_gpus_alloc'] > max_gpus or job['mem_alloc'] > max_mem:
            print(f"Job {i} exceeds limits! Skipping this job.")
            continue  # Skip jobs that exceed the node's capacity

        df.loc[df.index == weak_node_jobs.index[i], 'req_nodes'] = strongest_node

# Check if the weakest node is now empty
if df[df['req_nodes'] == weakest_node].empty:
    print(f" Weakest node {weakest_node} is now empty and can be shut down.")
    df = df[df['req_nodes'] != weakest_node]

# Display updated job allocation
print("\nðŸ”¹ Updated Job Allocation:")
print(df[['req_nodes', 'num_cores_alloc', 'num_gpus_alloc', 'mem_alloc']])

 Weakest node [23, 24] is now empty and can be shut down.

ðŸ”¹ Updated Job Allocation:
       req_nodes  num_cores_alloc  num_gpus_alloc  mem_alloc
5          [763]                4               0   0.234375
6          [878]                4               0   0.234375
8           [94]                4               0   0.234375
9          [773]                4               0   0.234375
13         [206]                4               0   0.234375
...          ...              ...             ...        ...
231170     [973]              128               4   0.231445
231177     [970]              128               4   0.231445
231208      [39]              128               4   0.231445
231231     [229]              128               4   0.231445
231234     [386]              128               4   0.231445

[26296 rows x 4 columns]
