Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions control-operator/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
# Build the manager binary
FROM golang:1.25 as builder
FROM golang:1.25 AS builder
ARG TARGETOS
ARG TARGETARCH
ARG MANAGER=task-manager

WORKDIR /workspace
# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
RUN sed -i '\,replace github.com/AliceO2Group/Control,d' go.mod
# cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN go mod download

# Copy the go source
COPY cmd/main.go cmd/main.go
COPY cmd/ cmd/
COPY api/ api/
COPY internal/controller/ internal/controller/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager ./cmd/${MANAGER}/

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
Expand Down
82 changes: 59 additions & 23 deletions control-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ IMAGE_TAG_BASE ?= gitlab-registry.cern.ch/aliceo2group/dockerfiles/aliecs

# BUNDLE_IMG defines the image:tag used for the bundle.
# You can use it as an arg. (E.g make bundle-build BUNDLE_IMG=<some-registry>/<project-name-bundle>:<tag>)
BUNDLE_IMG ?= $(IMAGE_TAG_BASE)-bundle:v$(VERSION)
BUNDLE_IMG ?= $(IMAGE_TAG_BASE)/bundle:v$(VERSION)

# BUNDLE_GEN_FLAGS are the flags passed to the operator-sdk generate bundle command
BUNDLE_GEN_FLAGS ?= -q --overwrite --version $(VERSION) $(BUNDLE_METADATA_OPTS)
Expand All @@ -51,7 +51,8 @@ endif
OPERATOR_SDK_VERSION ?= unknown

# Image URL to use all building/pushing image targets
IMG ?= gitlab-registry.cern.ch/aliceo2group/dockerfiles/aliecs/task-manager:latest
TASK_IMG ?= gitlab-registry.cern.ch/aliceo2group/dockerfiles/aliecs/task-manager:latest
ENVIRONMENT_IMG ?= gitlab-registry.cern.ch/aliceo2group/dockerfiles/aliecs/environment-manager:latest
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.27.1

Expand Down Expand Up @@ -97,11 +98,11 @@ help: ## Display this help.

.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
# Note that the option maxDescLen=0 was added in the default scaffold in order to sort out the issue
# Too long: must have at most 262144 bytes. By using kubectl apply to create / update resources an annotation
# is created by K8s API to store the latest version of the resource ( kubectl.kubernetes.io/last-applied-configuration).
# However, it has a size limit and if the CRD is too big with so many long descriptions as this one it will cause the failure.
$(CONTROLLER_GEN) rbac:roleName=manager-role crd:maxDescLen=0 webhook paths="./..." output:crd:artifacts:config=config/crd/bases
# Note that the option maxDescLen=0 was added in the default scaffold in order to sort out the issue
# Too long: must have at most 262144 bytes. By using kubectl apply to create / update resources an annotation
# is created by K8s API to store the latest version of the resource ( kubectl.kubernetes.io/last-applied-configuration).
# However, it has a size limit and if the CRD is too big with so many long descriptions as this one it will cause the failure.
$(CONTROLLER_GEN) rbac:roleName=manager-role crd:maxDescLen=0 webhook paths="./..." output:crd:artifacts:config=config/crd/bases

# .PHONY: manifests
# manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
Expand Down Expand Up @@ -137,23 +138,42 @@ clean-proto: ## Remove generated protobuf and gRPC Go files.
##@ Build

.PHONY: build
build: manifests generate fmt vet ## Build manager binary.
go build -o bin/manager cmd/main.go
build: manifests generate fmt vet ## Build manager binaries.
go build -o bin/task-manager ./cmd/task-manager/
go build -o bin/environment-manager ./cmd/environment-manager/

.PHONY: run
run: manifests generate fmt vet ## Run a controller from your host.
go run ./cmd/main.go
.PHONY: run-task
run-task: manifests generate fmt vet ## Run the task controller from your host.
go run ./cmd/task-manager/

.PHONY: run-environment
run-environment: manifests generate fmt vet ## Run the environment controller from your host.
go run ./cmd/environment-manager/

# If you wish built the manager image targeting other platforms you can use the --platform flag.
# (i.e. docker build --platform linux/arm64 ). However, you must enable docker buildKit for it.
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
.PHONY: docker-build
docker-build: test ## Build docker image with the manager.
$(CONTAINER_TOOL) build -t ${IMG} .
docker-build: docker-build-task docker-build-environment ## Build all docker images.

.PHONY: docker-build-task
docker-build-task: test ## Build docker image for the task manager.
$(CONTAINER_TOOL) build --build-arg MANAGER=task-manager -t ${TASK_IMG} .

.PHONY: docker-build-environment
docker-build-environment: test ## Build docker image for the environment manager.
$(CONTAINER_TOOL) build --build-arg MANAGER=environment-manager -t ${ENVIRONMENT_IMG} .

.PHONY: docker-push
docker-push: ## Push docker image with the manager.
$(CONTAINER_TOOL) push ${IMG}
docker-push: docker-push-task docker-push-environment ## Push all docker images.

.PHONY: docker-push-task
docker-push-task: ## Push docker image for the task manager.
$(CONTAINER_TOOL) push ${TASK_IMG}

.PHONY: docker-push-environment
docker-push-environment: ## Push docker image for the environment manager.
$(CONTAINER_TOOL) push ${ENVIRONMENT_IMG}

# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
Expand Down Expand Up @@ -187,13 +207,28 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified
$(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -

.PHONY: deploy
deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
$(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - --server-side
deploy: deploy-task deploy-environment ## Deploy both controllers to the K8s cluster specified in ~/.kube/config.

.PHONY: deploy-task
deploy-task: manifests kustomize ## Deploy task controller to the K8s cluster specified in ~/.kube/config.
cd config/task && $(KUSTOMIZE) edit set image task-manager=${TASK_IMG}
$(KUSTOMIZE) build config/task | $(KUBECTL) apply -f - --server-side

.PHONY: deploy-environment
deploy-environment: manifests kustomize ## Deploy environment controller to the K8s cluster specified in ~/.kube/config.
cd config/environment && $(KUSTOMIZE) edit set image environment-manager=${ENVIRONMENT_IMG}
$(KUSTOMIZE) build config/environment | $(KUBECTL) apply -f - --server-side

.PHONY: undeploy
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
undeploy: undeploy-task undeploy-environment ## Undeploy both controllers from the K8s cluster specified in ~/.kube/config.

.PHONY: undeploy-task
undeploy-task: kustomize ## Undeploy task controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/task | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -

.PHONY: undeploy-environment
undeploy-environment: kustomize ## Undeploy environment controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/environment | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -

##@ Build Dependencies

Expand Down Expand Up @@ -252,7 +287,8 @@ endif
.PHONY: bundle
bundle: manifests kustomize operator-sdk ## Generate bundle manifests and metadata, then validate generated files.
$(OPERATOR_SDK) generate kustomize manifests -q
cd config/manager && $(KUSTOMIZE) edit set image controller=$(IMG)
cd config/manager && $(KUSTOMIZE) edit set image task-manager=$(TASK_IMG)
cd config/manager && $(KUSTOMIZE) edit set image environment-manager=$(ENVIRONMENT_IMG)
$(KUSTOMIZE) build config/manifests | $(OPERATOR_SDK) generate bundle $(BUNDLE_GEN_FLAGS)
$(OPERATOR_SDK) bundle validate ./bundle

Expand Down Expand Up @@ -286,7 +322,7 @@ endif
BUNDLE_IMGS ?= $(BUNDLE_IMG)

# The image tag given to the resulting catalog image (e.g. make catalog-build CATALOG_IMG=example.com/operator-catalog:v0.2.0).
CATALOG_IMG ?= $(IMAGE_TAG_BASE)-catalog:v$(VERSION)
CATALOG_IMG ?= $(IMAGE_TAG_BASE)/catalog:v$(VERSION)

# Set CATALOG_BASE_IMG to an existing catalog image tag to add $BUNDLE_IMGS to that image.
ifneq ($(origin CATALOG_BASE_IMG), undefined)
Expand Down
17 changes: 17 additions & 0 deletions control-operator/PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,21 @@ resources:
kind: Task
path: github.com/AliceO2Group/Control/operator/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: alice.cern
group: aliecs
kind: Environment
path: github.com/AliceO2Group/Control/operator/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
domain: alice.cern
group: aliecs
kind: TaskTemplate
path: github.com/AliceO2Group/Control/operator/api/v1alpha1
version: v1alpha1
version: "3"
14 changes: 14 additions & 0 deletions control-operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ In order to deploy Task and Environment workflows to the k8s cluster you need co
controlling custom CRDs defining ALICE custom workload. This Folder defines and implements all moving parts together with Makefile
to build, deploy, install CRDs and operators.

## Architecture

The operator is split into two separate binaries with different deployment strategies:

**task-manager** runs as a DaemonSet — one pod per node. Each pod is responsible only for `Task` resources assigned to its node (matched via `spec.nodeName`). This is necessary because the task-manager communicates with OCC gRPC processes running locally on the same node via `hostNetwork`.

**environment-manager** runs as a Deployment with a single replica per cluster. It is responsible for `Environment` resources which are cluster-scoped and not tied to a specific node.

## Getting Started

You’ll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) to get a local cluster for testing, or run against a remote cluster. Author had the most success with K3s [see](/docs/kubernetes_ecs.md).
Expand Down Expand Up @@ -78,6 +86,12 @@ make run

**NOTE:** You can also run this in one step by running: `make install run`

**NOTE:** The task-manager requires a `NODE_NAME` environment variable to know which node it is responsible for. In-cluster this is injected automatically via the downward API. When running locally you must set it manually:

```sh
NODE_NAME=<your-node-name> make run
```

### Modifying the API definitions

If you are editing the API definitions, generate the manifests such as CRs or CRDs using:
Expand Down
131 changes: 131 additions & 0 deletions control-operator/api/v1alpha1/environment_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* === This file is part of ALICE O² ===
*
* Copyright 2026 CERN and copyright holders of ALICE O².
* Author: Michal Tichak <michal.tichak@cern.ch>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* In applying this license CERN does not waive the privileges and
* immunities granted to it by virtue of its status as an
* Intergovernmental Organization or submit itself to any jurisdiction.
*/

package v1alpha1

import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// TaskDefinition is a lightweight inline task definition used within an Environment spec.
// Unlike TaskTemplate, it is not a cluster resource — it carries a plain name and the same spec fields.
type TaskDefinition struct {
Name string `json:"name"`
Spec TaskTemplateSpec `json:"spec"`
}

// EnvironmentSpec defines the desired state of Environment
type EnvironmentSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make" to regenerate code after modifying this file
// The following markers will use OpenAPI v3 schema to validate the value
// More info: https://book.kubebuilder.io/reference/markers/crd-validation.html

Tasks map[string][]TaskDefinition `json:"tasks"`
// +kubebuilder:validation:Enum=standby;deployed;configured;running
State string `json:"state"`
}

// EnvironmentStatus defines the observed state of Environment.
type EnvironmentStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file

// For Kubernetes API conventions, see:
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties
// conditions represent the current state of the Environment resource.
// Each condition has a unique type and reflects the status of a specific aspect of the resource.
//
// Standard condition types include:
// - "Available": the resource is fully functional
// - "Progressing": the resource is being created or updated
// - "Degraded": the resource failed to reach or maintain its desired state
//
// TODO: use conditions properly during deployment
//
// The status of each condition is one of True, False, or Unknown.
// +listType=map
// +listMapKey=type
// +optional
Conditions []metav1.Condition `json:"conditions,omitempty"`

Tasks map[string]map[string]string `json:"tasks"`
State string `json:"state,omitempty"`
}

type TaskReference struct {
Name string `json:"name"`
Env []v1.EnvVar `json:"env"`
ArgsCLI []string `json:"argsCLI"`
ArgsTransition map[string]string `json:"argsTransition"`
}

type TemplateSpecification struct {
Tasks map[string][]TaskReference `json:"tasks"`
}

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="Desired",type="string",JSONPath=".spec.state"
// +kubebuilder:printcolumn:name="Actual",type="string",JSONPath=".status.state"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"

// Environment is the Schema for the environments API
type Environment struct {
metav1.TypeMeta `json:",inline"`

// metadata is a standard object metadata
// +optional
metav1.ObjectMeta `json:"metadata,omitzero"`

// taskTemplates defines templates stored in cluster to be used
// for task creation, meant for more common tasks
// +optional
TaskTemplates TemplateSpecification `json:"taskTemplates"`

// spec defines the desired state of Environment
// +required
Spec EnvironmentSpec `json:"spec"`

// status defines the observed state of Environment
// +optional
Status EnvironmentStatus `json:"status,omitzero"`
}

// +kubebuilder:object:root=true

// EnvironmentList contains a list of Environment
type EnvironmentList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitzero"`
Items []Environment `json:"items"`
}

func init() {
SchemeBuilder.Register(&Environment{}, &EnvironmentList{})
}
3 changes: 2 additions & 1 deletion control-operator/api/v1alpha1/task_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ type TaskSpec struct {
Properties map[string]string `json:"properties,omitempty"`
Arguments map[string]string `json:"arguments,omitempty"`
// +kubebuilder:validation:Enum=standby;deployed;configured;running
State string `json:"state,omitempty"` // this is the *requested* state, there are other states the task may end up in but cannot be requested
State string `json:"state,omitempty"` // this is the *requested* state, there are other states the task may end up in but cannot be requested
NodeName string `json:"nodeName,omitempty"`
}

// TaskStatus defines the observed state of Task
Expand Down
Loading
Loading