数据流的中位数
Problem
中位数是有序列表中间的数。如果列表长度是偶数,中位数则是中间两个数的平均值。
例如,
[2,3,4] 的中位数是 3
[2,3] 的中位数是 (2 + 3) / 2 = 2.5
设计一个支持以下两种操作的数据结构:
void addNum(int num) // 从数据流中添加一个整数到数据结构中。
double findMedian() // 返回目前所有元素的中位数。
Solution-1 维护有序序列
可以直接维护一个单调的序列,链表或者数组都可以,两者实现方法不同但是时间复杂度一样,这里给出链表的实现
struct IncListNode {
int val;
IncListNode* next;
IncListNode* prev;
IncListNode(): val(0), next(nullptr), prev(nullptr) {}
IncListNode(int value, IncListNode* nextNode=nullptr, IncListNode* prevNode=nullptr)
: val(value), next(nextNode), prev(prevNode) {}
};
class IncreasingList {
private:
IncListNode* head;
IncListNode* tail;
int numElem;
IncListNode* medianPtr;
bool even() {
return (numElem % 2) == 0;
}
public:
IncreasingList(): numElem(0) {
head = new IncListNode();
tail = new IncListNode();
head->next = tail;
tail->prev = head;
medianPtr = head;
}
~IncreasingList() {
IncListNode* ptr = head;
while (ptr) {
IncListNode* tempPtr = ptr;
ptr = ptr->next;
delete tempPtr;
}
}
/* 1. even, not cross: do nothing
2. even, cross: ptr = ptr->next
3. odd, not cross: ptr = ptr->prev
4. odd, cross: do nothing
*/
void insert(int n) {
IncListNode* newNode = new IncListNode(n);
IncListNode* ptr = head->next;
bool cross = false;
while(ptr != tail) {
if (n > ptr->val) {
if (ptr == medianPtr) cross = true;
ptr = ptr->next;
}
else if (n <= ptr->val) {
ptr->prev->next = newNode;
newNode->prev = ptr->prev;
ptr->prev = newNode;
newNode->next = ptr;
break;
}
}
if (ptr == tail) {
cross = true;
ptr = tail->prev;
tail->prev = newNode;
newNode->next = tail;
ptr->next = newNode;
newNode->prev = ptr;
}
if (even() && cross) {
medianPtr = medianPtr->next;
}
else if (!even() && !cross) {
medianPtr = medianPtr->prev;
}
numElem++;
}
double median() {
if (even())
return static_cast<double>(medianPtr->val + medianPtr->next->val) / 2;
else
return static_cast<double>(medianPtr->val);
}
void print() {
IncListNode* ptr = head->next;
cout << "head -> ";
while(ptr != tail) {
cout << ptr->val << " -> ";
ptr = ptr->next;
}
cout << "tail; medPtr: " << medianPtr->val << endl;
}
};
class MedianFinder {
private:
IncreasingList* list;
public:
/** initialize your data structure here. */
MedianFinder() {
list = new IncreasingList();
}
~MedianFinder() {
delete list;
}
void addNum(int num) {
list->insert(num);
//list->print();
}
double findMedian() {
return list->median();
}
};
/**
* Your MedianFinder object will be instantiated and called as such:
* MedianFinder* obj = new MedianFinder();
* obj->addNum(num);
* double param_2 = obj->findMedian();
*/
Solution-2 维护两个平衡的堆
可以维护两个size平衡的堆,一个大顶堆记录有序序列的前半段,另一个小顶堆记录有序序列的后半段。如果元素个数为2k+1
个的话,则大顶堆记录前k+1
个,小顶堆记录后k
个。如果是2k
的话就均分。这样计算中位数的时候就可以以常数复杂度返回,并且添加元素的复杂度是对数级别
// code-1
class MedianFinder {
priority_queue<int> lo; // max heap
priority_queue<int, vector<int>, greater<int>> hi; // min heap
public:
// Adds a number into the data structure.
void addNum(int num)
{
lo.push(num); // Add to max heap
hi.push(lo.top()); // balancing step
lo.pop();
if (lo.size() < hi.size()) { // maintain size property
lo.push(hi.top());
hi.pop();
}
}
// Returns the median of current data stream
double findMedian()
{
return lo.size() > hi.size()
? (double) lo.top() : (lo.top() + hi.top()) * 0.5;
}
};
// code-2
class MedianFinder {
private:
priority_queue<int> low;
priority_queue<int, vector<int>, std::greater<int>> high;
public:
void addNum(int num) {
low.push(num);
if (low.size() > high.size() + 1) {
high.push(low.top());
low.pop();
}
if (!low.empty() && !high.empty()) {
while (low.top() > high.top()) {
int temp = low.top();
low.pop();
low.push(high.top());
high.pop();
high.push(temp);
}
}
}
double findMedian() {
if (low.size() > high.size()) return static_cast<double>(low.top());
else return static_cast<double>(low.top() + high.top()) / 2;
}
};