用Python解决数据结构与算法问题
Mar 14 2017发现一本Python的好书被翻译了,利用下班时间学习了一下,把相关代码都实现了一遍,包括:
- 栈,队列,双端队列
- 无序链表,有序链表
- 二叉树,堆,二叉搜索树,AVL树
- 图
以及一些算法
# coding: utf-8
u"""
线性数据结构, 栈, 队列, deques, 容器结构, 数据项之间存在相对的位置
"""
class Stack(object):
u"""
栈 先进后出
"""
def __init__(self):
self.items = []
def push(self, item):
self.items.append(item) # O(1)
def pop(self):
return self.items.pop() # O(1)
def peek(self):
return self.items[-1]
def isEmpty(self):
return self.items == []
def size(self):
return len(self.items)
u"""
全括号匹配
((()))
()(())()
"""
def parChecker(symbolString):
stack = Stack()
index = 0
while index < len(symbolString):
sym = symbolString[index]
if sym == '(':
stack.push(sym)
else:
if stack.isEmpty():
return False
else:
stack.pop()
index += 1
if stack.isEmpty():
return True
return False
u"""
代码括号匹配
"""
def codeChecker(symbolString):
stack = Stack()
index = 0
while index < len(symbolString):
sym = symbolString[index]
if sym in '([{':
stack.push(sym)
elif sym in ')]}':
if stack.isEmpty():
return False
else:
if not matches(stack.pop(), sym):
return False
index += 1
if stack.isEmpty():
return True
return False
def matches(open, close):
opens = '([{'
closes = ')]}'
return opens.index(open) == closes.index(close)
u"""
任意进制转换
"""
def baseConverter(decNumber, base):
digits = '0123456789ABCDEF'
stack = Stack()
while decNumber > 0:
stack.push(digits[decNumber % base])
decNumber = decNumber // base
string = ''
while not stack.isEmpty():
string += stack.pop()
return string
class Queue(object):
u"""
队列 先进先出
"""
def __init(self):
self.items = []
def isEmpty(self):
return self.items == []
def size(self):
return len(self.items)
def enqueue(self, item):
self.items.insert(0, item) # O(n)
def dequeue(self):
return self.items.pop() # O(1)
u"""
烫手山芋 循环传递n次 淘汰一个 直到最后幸存一个
"""
def hotPotato(namelist, num):
queue = Queue()
for name in namelist:
queue.enqueue(name)
while queue.size() > 1:
for i in range(num):
queue.enqueue(queue.dequeue())
queue.dequeue()
return queue.dequeue()
class Deque(object):
u"""
双端队列 可以同时从2端 进出
"""
def __init__(self):
self.items = []
def isEmpty(self):
return self.items == []
def size(self):
return len(self.items)
def addFront(self, item):
self.items.insert(0, item) # O(n)
def removeFront(self):
return self.items.pop(0) # O(n)
def addRear(self, item):
self.items.append(item) # O(1)
def removeRear(self):
return self.items.pop() # O(1)
u"""
回文检查 字符串前后一致
"""
def palchecker(aString):
deque = Deque()
for i in aString:
deque.addRear(i)
while deque.size() > 1:
if deque.removeFront() != deque.removeRear():
return False
return True
u"""
链表 节点包含数据本身,并保存指向下一个节点的指针
"""
class Node(object):
def __init__(self, initdata):
self.data = initdata
self.next = None
def getData(self):
return self.data
def getNext(self):
return self.next
def setData(self, data):
self.data = data
def setNext(self, node):
self.next = node
u"""
无序链表
"""
class UnorderedList(object):
def __init__(self):
self.head = None
def isEmpty(self): # O(1)
return self.head == None
def add(self, item): # O(1)
node = Node(item)
node.setNext(self.head)
self.head = node
def size(self): # 遍历链 O(n)
count = 0
current = self.head
while current:
count += 1
current = current.getNext()
return count
def search(self, item): # O(n)
current = self.head
while current:
if current.getData() == item:
return True
current = current.getNext()
return False
def remove(self, item): # O(n)
previous = None
current = self.head
while current:
if current.getData() == item:
if previous is None:
self.head = current.getNext()
else:
previous.setNext(current.getNext())
return
previous = current
current = current.getNext()
def append(self, item): # O(n)
node = Node(item)
if self.head is None:
self.head = node
return
current = self.head
while current.getNext():
current = current.getNext()
current.setNext(node)
def index(self, item): # O(n)
count = 0
current = self.head
while current:
if current.getData() == item:
return count
current = current.getNext()
count += 1
def insert(self, pos, item): # O(k)
node = Node(item)
current = self.head
previous = None
for _ in range(pos):
previous = current
current = current.getNext()
node.setNext(current)
if previous is None:
self.head = node
else:
previous.setNext(node)
def pop(self, pos=None): # O(n)
current = self.head
previous = None
if pos is None:
while current.getNext():
previous = current
current = current.getNext()
else:
for _ in range(pos):
previous = current
current = current.getNext()
if previous is None:
self.head = current.getNext()
else:
previous.setNext(current.getNext())
return current.getData()
u"""
有序链表
"""
class OrderedList(UnorderedList):
def add(self, item): # O(n)
previous = None
current = self.head
while current:
if current.getData() > item:
break
previous = current
current = current.getNext()
node = Node(item)
node.setNext(current)
if previous is None:
self.head = node
else:
previous.setNext(node)
def search(self, item): # O(n)
current = self.head
while current:
if current.getData() == item:
return True
elif current.getData() > item:
break
current = current.getNext()
return False
u"""
递归
1. 递归算法必须具有基本情况
2. 递归算法必须改变状态并靠近其基本情况
3. 递归算法必须递归方式调用自身
"""
u"""
递归算法 整数转字符串
"""
def toStr(n, base):
digits = '0123456789ABCDEF'
if n < base:
return digits[n]
else:
return toStr(n // base, base) + digits[n % base]
u"""
河内塔
1. 把上层的所有盘子移动到中间
2. 把最后一个移动到目标
3. 把中间的所有盘子移动到目标
"""
def moveTower(height, fromPole, toPole, withPole):
if height >= 1:
moveTower(height-1, fromPole, withPole, toPole)
moveDisk(fromPole, toPole)
moveTower(height-1, withPole, toPole, fromPole)
def moveDisk(fp,tp):
print 'move disk from ' + fp + ' to ' + tp
u"""
探索迷宫
1. 向北边走一格, 递归
2. 如果向北不能走, 向南走一格, 递归
3. 如果向南不能走, 向东走一格, 递归
4. 如果向东不能走, 向西走一格, 递归
先走一格, 然后继续向不同方向递归, 直到找到出口
def searchFrom(maze, startRow, startColumn):
maze.updatePosition(startRow, startColumn)
# Check for base cases:
# 1. We have run into an obstacle, return false
if maze[startRow][startColumn] == OBSTACLE : # 碰到墙壁
return False
# 2. We have found a square that has already been explored
if maze[startRow][startColumn] == TRIED: # 已经来过
return False
# 3. Success, an outside edge not occupied by an obstacle
if maze.isExit(startRow,startColumn):
maze.updatePosition(startRow, startColumn, PART_OF_PATH)
return True # 判断为出口
maze.updatePosition(startRow, startColumn, TRIED) # 设置来过
# Otherwise, use logical short circuiting to try each
# direction in turn (if needed)
found = searchFrom(maze, startRow-1, startColumn) or \
searchFrom(maze, startRow+1, startColumn) or \
searchFrom(maze, startRow, startColumn-1) or \
searchFrom(maze, startRow, startColumn+1)
if found:
maze.updatePosition(startRow, startColumn, PART_OF_PATH)
else:
maze.updatePosition(startRow, startColumn, DEAD_END)
return found
"""
u"""
动态规划
硬币找零问题 找到最小可能的硬币数
"""
def recMC(coinValueList, change):
minCoins = change
if change in coinValueList:
return 1
else:
for i in [c for c in coinValueList if c <= change]:
num = 1 + recMC(coinValueList, change-i)
if num < minCoins:
minCoins = num
return minCoins
u"""
以上算法由于计算了大量的重复数据, 所以低效, 可以对已有的数据缓存, 避免重复计算
从顶至下
"""
def recDC(coinValueList, change, knownResults):
minCoins = change
if change in coinValueList:
knownResults[change] = 1
return 1
elif knownResults[change] > 0:
return knownResults[change]
else:
for i in [c for c in coinValueList if c <= change]:
num = 1 + recDC(coinValueList, change-i, knownResults)
if num < minCoins:
minCoins = num
knownResults[change] = minCoins
return minCoins
u"""
动态规划
1. 最小最优解
2. 从小到大, 使用已有的计算结果, 从底至上
"""
def dpMakeChange(coinValueList, change, minCoins, coinsUsed):
for cents in range(change+1):
coinCount = cents
newCoin = 1
for i in [c for c in coinValueList if c <= cents]:
if minCoins[cents-i] + 1 < coinCount:
coinCount = minCoins[cents-i] + 1
newCoin = i
minCoins[cents] = coinCount
coinsUsed[cents] = newCoin
return minCoins[change]
# 打印最佳组合的各个值
def printCoins(coinsUsed, change):
l = []
while change:
l.append(coinsUsed[change])
change -= coinsUsed[change]
print l
u"""
背包问题
n=5是物品的数量,c=10是书包能承受的重量,w=[2,2,6,5,4]是每个物品的重量,v=[6,3,5,4,6]是每个物品的价值
"""
def bag(n, c, w, v):
res=[[-1 for j in range(c + 1)] for i in range(n + 1)]
for j in range(c + 1):
res[0][j]=0 # 把第一列初始化 0
for i in range(1, n + 1): # 行 1~5
for j in range(1, c + 1): # 列 1~10
res[i][j] = res[i - 1][j] # 先把上一行的值赋值个当前
if j >= w[i-1] and res[i][j] < res[i-1][j - w[i-1]] + v[i - 1]:
# 如果重量大于物品的行重量且当前价值小于物品价值+前一个物品重量,则重算最大重量
res[i][j] = res[i-1][j-w[i-1]] + v[i-1]
return res
def show(n, c, w, res):
print '最大价值', res[n][c]
x = [False for _ in w]
j = c
for i in range(1, n+1):
if res[i][j] > res[i-1][j]:
x[i-1] = True
j -= w[i-1]
for i, j in enumerate(x):
if j:
print '第{}个'.format(i+1)
u"""
二分查找
"""
def binarySearch(alist, item):
first = 0
last = len(alist) - 1
found = False
while first <= last and not found:
mid = first + (last - first) // 2
if alist[mid] == item:
found = True
elif alist[mid] > item:
last = mid - 1
else:
first = mid + 1
return found
# 递归版
def binarySearch2(alist, item): # O( log^n )
if len(alist) == 0:
return False
else:
mid = len(alist) // 2
if alist[mid] == item:
return True
elif alist[mid] > item:
return binarySearch2(alist[:mid], item)
else:
return binarySearch2(alist[mid+1:], item)
u"""
哈希表
1. 大小为m的哈希表,有从0~m-1为编号的槽 可以用数组表示
2. 项与编号的映射方法叫 hash 函数,简单的示例可以用取余
* 如果多个项经过 hash 函数得到的编号相同,则称为碰撞
* 分组求和 把项的多个数字分组求和再取m的余
* 平方取中 算项的平方值并去中间的某几位 再取m的余
3. 项数/表大小m 的值 称为 负载因子
4. 冲突解决 重新散列
* 开放寻址 如果槽已被占用,则顺序往槽的下一个槽查找,直到找到空槽(线性探测)
* 使用链式存储,在同一个编号上存储多个项
"""
class HashTable(object):
def __init__(self):
self.size = 11 # 要为质数
self.slots = [None]*self.size
self.data = [None]*self.size
def _hash(self, key):
return key % self.size
def _rehash(self, oldhash):
return (oldhash+1)%self.size
def put(self, key, data):
hashvalue = self._hash(key)
if self.slots[hashvalue] is None: # 新增
self.slots[hashvalue] = key
self.data[hashvalue] = data
elif self.slots[hashvalue] == key: # 修改
self.data[hashvalue] = data
else:
nextslot = self._rehash(hashvalue)
while self.slots[nextslot] != None and self.slots[nextslot] != key:
nextslot = self._rehash(hashvalue)
if self.slots[nextslot] is None:
self.slots[nextslot] = key
self.data[nextslot] = data
def get(self, key):
position = self._get_position(key)
return self.data[position] if position is not None else None
def _get_position(self, key): # 理想的情况下 O(1) 最差 O(n)
startslot = self._hash(key)
position = startslot
while self.slots[position] != None:
if self.slots[position] == key:
return position
else:
position = self._rehash(position)
if position == startslot:
return None
return None
def __getitem__(self, key):
return self.get(key)
def __setitem__(self, key, data):
self.put(key, data)
def __len__(self):
count = 0
for i in self.slots:
if i is not None:
count += 1
return count
def __delitem__(self, key):
position = self._get_position(key)
if position is not None:
self.slots[position] = None
self.data[position] = None
def __contains__(self, key):
position = self._get_position(key)
return False if position is None else True
u"""
冒泡排序
"""
def bubbleSort(alist): # O(n^2)
for passnum in range(len(alist)-1, 0, -1):
for i in range(passnum):
if alist[i] > alist[i+1]:
alist[i], alist[i+1] = alist[i+1], alist[i]
u"""
短冒泡排序
前一次遍历如果没有发生交换,证明整个序列以排序
"""
def shortBubbleSort(alist):
passnum = len(alist) - 1
exchange = True
while passnum > 0 and exchange:
exchange = False
for i in range(passnum):
if alist[i] > alist[i+1]:
exchange = True
alist[i], alist[i+1] = alist[i+1], alist[i]
passnum -= 1
u"""
选择排序
同冒泡排序,只是减少的交换次数
"""
def selectionSort(alist):
for passnum in range(len(alist)-1, 0, -1):
maxPositon = 0
for i in range(1, passnum+1):
if alist[i] > alist[maxPositon]:
maxPositon = i
alist[passnum], alist[maxPositon] = alist[maxPositon], alist[passnum]
u"""
插入排序 O(n^2)
维持最小的已排序序列,迭代未排序项,插入到合适的位置
"""
def insertionSort(alist):
for i in range(1, len(alist)):
current = alist[i]
position = i
while position > 0 and alist[position-1] > current:
alist[position] = alist[position-1]
position -= 1
alist[position] = current
u"""
shell 排序
通过gap间隔来划分子序列,先对所有的子序列进行插入排序,然后再对整个序列插入排序
O(n^3/2) 稍稍好于插入排序
"""
def shellSort(alist):
count = len(alist) // 2 # 选择gap
while count > 0:
for i in range(count):
gapInsertionSort(alist, i, count)
count = count // 2
def gapInsertionSort(alist, start, gap):
for i in range(start+gap, len(alist), gap):
current = alist[i]
position = i
while position > start and alist[position-gap] > current:
alist[position] = alist[position-gap]
position -= gap
alist[position] = current
u"""
归并排序
递归算法,分治,把序列切割成最小的部分并排序,然后逐步合并已排序的结果 O(nlog^n)
"""
def mergeSort(alist):
if len(alist) > 1:
mid = len(alist)//2 # 二分 log^n
left = alist[:mid]
right = alist[mid:]
mergeSort(left) # 递归排序,最小部分为 长度为1的列表
mergeSort(right)
# 合并已排序的片段 最大花费n次
i = j = k = 0
while i < len(left) and j < len(right):
if left[i] < right[j]:
alist[k] = left[i]
i += 1
else:
alist[k] = right[j]
j += 1
k += 1
while i < len(left):
alist[k] = left[i]
i += 1
k += 1
while j < len(right):
alist[k] = right[j]
j += 1
k += 1
u"""
快速排序 O(nlogn)
"""
def quickSort(alist):
quickSortHelper(alist,0,len(alist)-1)
def quickSortHelper(alist, first, last):
if first < last:
i, j = first, last
current = alist[first]
while i < j:
while i < j and alist[j] >= current:
j -= 1
alist[i] = alist[j]
while i < j and alist[i] <= current:
i += 1
alist[j] = alist[i]
alist[i] = current
quickSortHelper(alist,first,i-1)
quickSortHelper(alist,i+1,last)
u"""
树
节点 树的基础部分 可以有附加属性,称为有效载荷 key
边 表示节点之间的联系, 除了 根节点 其它节点都有从父节点来的边(传入边)
根 没有传入边的节点
路径 由边连接的节点的有序序列
子节点 传入边为相同父节点
父节点
兄弟
子树 由父节点 与 子孙节点组成的一组节点与边
叶节点 没有子节点的节点
层数 由根到节点之间边的条数
高度 最大的叶子节点层数
定义 树由一组节点和连接节点的边组成
- 树的一个节点指定为根节点
- 除了根,没有节点n通过边与p相连,n是p的子节点
- 从根遍历到某个节点的路径唯一
定义 树是由根与一系列子树组成,子数的根与根相连,递归定义
"""
u"""
列表表示 二叉树
"""
def binaryTree(r):
return [r, [], []]
def insertLeft(root,newBranch):
node = root.pop(1)
if node:
root.insert(1, [newBranch, node, []])
else:
root.insert(1, [newBranch, [], []])
return root
def insertRight(root,newBranch):
node = root.pop(2)
if node:
root.insert(2, [newBranch, [], node])
else:
root.insert(2, [newBranch, [], []])
return root
def getRootVal(root):
return root[0]
def setRootVal(root,newVal):
root[0] = newVal
def getLeftChild(root):
return root[1]
def getRightChild(root):
return root[2]
u"""
节点表示
"""
class BinaryTree(object):
def __init__(self, val):
self.key = val
self.left = None
self.right = None
def insertLeft(self, val):
node = BinaryTree(val)
if self.left is None:
self.left = node
else:
node.left = self.left
self.left = node
def insertRight(self, val):
node = BinaryTree(val)
if self.right is None:
self.right = node
else:
node.right = self.right
self.right = node
def getRootVal(self):
return self.key
def setRootVal(self, val):
self.key = val
def getLeftChild(self):
return self.left
def getRightChild(self):
return self.right
def preorder(self):
print self.key
if self.left:
self.left.preorder()
if self.right:
self.right.preorder()
u"""
使用树构建数值运算
( 1 + ( 4 * 3 ) )
"""
def buildParseTree(fpexp):
fplist = fpexp.split()
pstack = Stack()
etree = BinaryTree('')
pstack.push(etree)
current = etree
for i in fplist:
if i == '(':
current.insertLeft('')
pstack.push(current)
current = current.getLeftChild()
elif i not in ['+', '-', '*', '*', ')']:
current.setRootVal(int(i))
current = pstack.pop()
elif i in ['+', '-', '*', '*']:
current.setRootVal(i)
current.insertRight('')
pstack.push(current)
current = current.getRightChild()
elif i == ')':
current = pstack.pop()
print current.getRootVal()
return etree
import operator
u"""
递归算式树求值
"""
def evaluate(parseTree):
opers = {'+':operator.add, '-':operator.sub, '*':operator.mul, '/':operator.truediv}
left = parseTree.getLeftChild()
right = parseTree.getRightChild()
if left and right:
fn = opers[parseTree.getRootVal()]
return fn(evaluate(left), evaluate(right))
else:
return parseTree.getRootVal()
u"""
树的遍历
前序 返问根,递归前序遍历左子树,递归前序遍历右子树
中序 递归中序遍历左子树,返问根,递归中序遍历右子树
后序 递归后序遍历左子树,递归后序遍历右子树,返问根
"""
def preorder(tree): # 前序
if tree:
print tree.getRootVal()
preorder(tree.getLeftChild())
preorder(tree.getRightChild())
def postorder(tree): # 后序
if tree:
postorder(tree.getLeftChild())
postorder(tree.getRightChild())
print tree.getRootVal()
u"""
遍历求值
"""
def postordereval(tree):
opers = {'+':operator.add, '-':operator.sub, '*':operator.mul, '/':operator.truediv}
res1 = None
res2 = None
if tree:
res1 = postordereval(tree.getLeftChild())
res2 = postordereval(tree.getRightChild())
if res1 and res2:
return opers[tree.getRootVal()](res1, res2)
else:
return tree.getRootVal()
def inorder(tree): # 中序
if tree:
inorder(tree.getLeftChild())
print tree.getRootVal()
inorder(tree.getRightChild())
u"""
打印原始分析树
"""
def printexp(tree):
s = ''
if tree:
s = '(' + printexp(tree.getLeftChild())
s = s + tree.getRootVal()
s = printexp(tree.getRightChild()) + ')'
return s
u"""
二叉堆 最小堆
子节点的一定小于父节点,根节点最小
每层的最大节点个数为2^h,所以子节点一定是父节点的2n 2n+1
"""
class BinHeap(object):
def __init__(self):
self.heapList = [0]
self.currentSize = 0
def insert(self, i): # O(log2^n)
self.heapList.append(i)
self.currentSize += 1
i = self.currentSize
j = i // 2 # 找到父节点的位置
while j > 0: # 对比交换
if self.heapList[i] < self.heapList[j]:
self.heapList[i], self.heapList[j] = self.heapList[j], self.heapList[i]
i = j
j = j // 2
def delMin(self): # O(log2^n)
minVal = self.heapList[1]
self.heapList[1] = self.heapList[-1]
self.currentSize -= 1
self.heapList.pop() # 删除最小,把最大的移动到根
self._percDown(1) # 遍历比较,把合适的移动到根
return minVal
def _minChild(self, i):
if (i * 2) + 1 > self.currentSize:
return i * 2
elif self.heapList[i*2] > self.heapList[i*2+1]:
return i * 2 + 1
else:
return i * 2
def _percDown(self, i):
while (i * 2) <= self.currentSize:
mc = self._minChild(i)
if self.heapList[i] > self.heapList[mc]:
self.heapList[i], self.heapList[mc] = self.heapList[mc], self.heapList[i]
i = mc
def buildHeap(self, alist): # O(n)
self.currentSize = len(alist)
self.heapList = [0] + alist[:]
i = self.currentSize // 2
while i > 0:
self._percDown(i)
i -= 1
def isEmpty(self):
return True if self.currentSize == 0 else False
def size(self):
return self.currentSize
u"""
最大堆
"""
class MaxBinHeap(object):
def __init__(self):
self.heapList = [0]
self.currentSize = 0
def insert(self, i):
self.heapList.append(i)
self.currentSize += 1
i = self.currentSize
j = i // 2
while j > 0:
if self.heapList[i] > self.heapList[j]:
self.heapList[i], self.heapList[j] = self.heapList[j], self.heapList[i]
i = j
j = j // 2
def delMax(self):
maxVal = self.heapList[1]
self.heapList[1] = self.heapList[-1]
self.currentSize -= 1
self.heapList.pop()
self._percUp(1)
return maxVal
def _maxChild(self, i):
if (i * 2) + 1 > self.currentSize:
return i * 2
elif self.heapList[i*2] < self.heapList[i*2+1]:
return i * 2 + 1
else:
return i * 2
def _percUp(self, i):
while (i * 2) <= self.currentSize:
mc = self._maxChild(i)
if self.heapList[i] < self.heapList[mc]:
self.heapList[i], self.heapList[mc] = self.heapList[mc], self.heapList[i]
i = mc
def buildHeap(self, alist):
self.currentSize = len(alist)
self.heapList = [0] + alist[:]
i = self.currentSize // 2
while i > 0:
self._percUp(i)
i -= 1
def isEmpty(self):
return True if self.currentSize == 0 else False
def size(self):
return self.currentSize
u"""
堆排序
构建最大堆
把根交换到最后
递归n-1构建最大堆,交换 O(nlogn)
"""
def heap_sort(lst):
def sift_down(start, end):
root = start
while 1:
child = 2 * root + 1
if child > end:
break
if child + 1 < end and lst[child] < lst[child+1]:
child = child + 1
if lst[root] < lst[child]:
lst[root], lst[child] = lst[child], lst[root]
root = child
else:
break
i = len(lst) // 2 # 构建最大堆
while i >= 0:
sift_down(i, len(lst) - 1)
i -= 1
for end in range(len(lst) - 1, 0, -1):
lst[0], lst[end] = lst[end], lst[0] # 交换,重复构建
sift_down(0, end-1)
u"""
二叉搜索树
节点的左子节点必须小于节点的key
节点的右子节点必须大于节点的key
get put delete 都是遍历高度 O(log2^n)
如果输入是有序的,那么就是最坏的结果O(n), 因为只会遍历左边或者右边
"""
class BinarySearchTree(object):
def __init__(self):
self.root = None
self.size = 0
def length(self):
return self.size
def __len__(self):
return self.size
def __iter__(self):
return self.root.__iter__()
def put(self, key, val): # 复杂度在于树的高度 O(log2^n)
if self.root is None:
self.root = TreeNode(key, val)
else:
self._put(key, val, self.root)
self.size += 1
def _put(self, key, val, current):
if key > current.key:
if current.hasRightChild():
self._put(key, val, current.right)
else:
current.right = TreeNode(key, val, parent=current)
elif key < current.key:
if current.hasLeftChild():
self._put(key, val, current.left)
else:
current.left = TreeNode(key, val, parent=current)
else:
current.playload = val # key 相同时 直接替换值
def __setitem__(self, k, v):
self.put(k, v)
def get(self, key): # O(log2^n)
if self.root:
node = self._get(key, self.root)
if node:
return node.playload
return None
def _get(self, key, current):
if not current:
return None
elif key > current.key:
return self._get(key, current.right)
elif key < current.key:
return self._get(key, current.left)
else:
return current
def __getitem__(self, k):
return self.get(k)
def __contains__(self, k):
return bool(self._get(k, self.root))
def __delitem__(self, k):
self.delete(k)
def delete(self, key): # 虽然很复杂, 但是实际上向下遍历找到合适的节点, 所有最大还是高度 O(log2^n)
if self.size > 1:
node = self._get(key, self.root)
if node:
self.removeNode(node)
self.size -= 1
else:
raise KeyError(key)
elif self.size == 1 and self.root.key == key:
self.root = None
self.size -= 1
else:
raise KeyError(key)
def removeNode(self, current):
if current.isLeaf(): # 如果是叶子节点,直接删除
if current.isLeftChild():
current.parent.left = None
else:
current.parent.right = None
elif current.hasBothChildren(): # 有2个子节点
succ = current.findSuccessor()
succ.spliceOut()
current.key = succ.key
current.playload = succ.key
else: # 有1个子节点
if current.hasLeftChild(): # 有左节点
if current.isLeftChild():
current.parent.left = current.left
current.left.parent = current.parent
elif current.isRightChild():
current.parent.right = current.left
current.left.parent = current.parent
else: # 根节点,直接使用子节点的数据替换自己
current.replaceNodeData(current.left.key, current.left.playload,
current.left.left, current.left.right)
else:
if current.isLeftChild():
current.parent.left = current.right
current.right.parent = current.parent
elif current.isRightChild():
current.parent.right = current.right
current.right.parent = current.parent
else: # 根节点
current.replaceNodeData(current.right.key, current.right.playload,
current.right.left, current.right.right)
class TreeNode(object):
def __init__(self, key, val, left=None, right=None, parent=None):
self.key = key
self.playload = val
self.left = left
self.right = right
self.parent = parent
self.balanceFactor = 0
def hasLeftChild(self):
return self.left
def hasRightChild(self):
return self.right
def isLeftChild(self):
return self.parent and self.parent.hasLeftChild() == self
def isRightChild(self):
return self.parent and self.parent.hasRightChild() == self
def isRoot(self):
return not self.parent
def isLeaf(self):
return not (self.left and self.right)
def hasAnyChildren(self):
return self.left or self.right
def hasBothChildren(self):
return self.left and self.right
def replaceNodeData(self, key, value, lc, rc):
self.key = key
self.playload = value
self.left = lc
self.right = rc
if self.hasLeftChild():
self.left.parent = self
if self.hasRightChild():
self.right.parent = self
def findSuccessor(self):
succ = None
if self.hasRightChild():
return self.right.findMin()
else:
if self.parent:
if self.isLeftChild():
succ = self.parent
else: # 如果没有右节点, 且自己是右节点, 父节点小于自己, 遍历向上找
self.parent.right = None # 可以肯定的是节点 右边节点一定会大于节点本身
succ = self.parent.findSuccessor()
self.parent.right = self
return succ
def findMin(self):
current = self
while current.hasLeftChild():
current = current.left
return current
def spliceOut(self):
if self.isLeaf():
if self.isLeftChild():
self.parent.left = None
else:
self.parent.right = None
elif self.hasLeftChild():
if self.isLeftChild():
self.parent.left = self.left
else:
self.parent.right = self.left
self.left.parent = self.parent
else:
if self.isLeftChild():
self.parent.left = self.right
else:
self.parent.right = self.right
self.right.parent = self.parent
def __iter__(self):
if self:
if self.hasLeftChild():
for ele in self.left:
yield ele
yield self.key
if self.hasRightChild():
for ele in self.right:
yield ele
u"""
平衡二叉树
AVL树 自动保持平衡的树
平衡 根的左子树与右子树的高度差不超过1
"""
class AvlTree(BinarySearchTree):
def _put(self, key, val, current):
if key > current.key:
if current.hasRightChild():
self._put(key, val, current.right)
else:
current.right = TreeNode(key, val, parent=current)
self.updateBalance(current.right)
elif key < current.key:
if current.hasLeftChild():
self._put(key, val, current.left)
else:
current.left = TreeNode(key, val, parent=current)
self.updateBalance(current.left)
else:
current.playload = val
def updateBalance(self, node):
u"""
1. 递归调用到树的根,直到找到平衡因子大于1的节点
2. 父节点的平衡因子已调整为零。你应该说服自己,一旦一个子树的平衡因子为零,那么它的祖先节点的平衡不会改变。
"""
if node.balanceFactor > 1 or node.balanceFactor < -1:
self.rebalance(node) # 通过旋转可以保证节点平衡因子为0
return
if node.parent != None:
if node.isLeftChild():
node.parent.balanceFactor += 1
elif node.isRightChild():
node.parent.balanceFactor -= 1
if node.parent.balanceFactor != 0:
self.updateBalance(node.parent)
def rebalance(self, node):
if node.balanceFactor < 0:
if node.right.balanceFactor > 0:
self.rotateRight(node.right)
self.rotateLeft(node)
elif node.balanceFactor > 0:
if node.left.balanceFactor < 0:
self.rotateLeft(node.left)
self.rotateRight(node)
def rotateLeft(self, rotRoot): # 左旋转
u"""
1. 提升右孩子(B)成为子树的根。
2. 将旧根(A)移动为新根的左子节点。
3. 如果新根(B)已经有一个左孩子,那么使它成为新左孩子(A)的右孩子。
注意:由于新根(B)是A的右孩子,A 的右孩子在这一点上保证为空。
这允许我们添加一个新的节点作为右孩子,不需进一步的考虑。
"""
newRoot = rotRoot.right # 右节点为新根
rotRoot.right = newRoot.left # 新根的左节点称为旧根的右节点
if newRoot.hasLeftChild():
newRoot.left.parent = rotRoot
newRoot.parent = rotRoot.parent
if rotRoot.isRoot():
self.root = newRoot
else:
if rotRoot.isLeftChild():
rotRoot.parent.left = newRoot
elif rotRoot.isRightChild():
rotRoot.parent.right = newRoot
newRoot.left = rotRoot
rotRoot.parent = newRoot
rotRoot.balanceFactor = rotRoot.balanceFactor + 1 - min(newRoot.balanceFactor, 0)
newRoot.balanceFactor = newRoot.balanceFactor + 1 + max(rotRoot.balanceFactor, 0)
def rotateRight(self, rotRoot): # 右旋转
newRoot = rotRoot.left
rotRoot.left = newRoot.right
if newRoot.hasRightChild():
newRoot.right.parent = rotRoot
newRoot.parent = rotRoot.parent
if rotRoot.isRoot():
self.root = newRoot
else:
if rotRoot.isLeftChild():
rotRoot.parent.left = newRoot
elif rotRoot.isRightChild():
rotRoot.parent.right = newRoot
newRoot.right = rotRoot
rotRoot.parent = newRoot
rotRoot.balanceFactor = rotRoot.balanceFactor + 1 - min(newRoot.balanceFactor, 0)
newRoot.balanceFactor = newRoot.balanceFactor + 1 + max(rotRoot.balanceFactor, 0)
u"""
图
顶点 同树中的节点
边 连接2个顶点,可以是多个方向的,如果图中边都只有一个方法,则称为有向图
权重 边可以被加权,表示顶点到顶点过去的成本
路径 由边连接的顶点的序列, 未加权的路径长度n-1, 加权的为路径所有边权重的和
循环 有向图的循环是同一顶点为起点和终点的路径, 没有循环的图称为非循环图形, 没有循环的有向图称为无环图 DAG
图 是由顶点集合与边集合组成的 G = (V, E) G 图 V 顶点集合 E 边的集合
"""
u"""
邻接矩阵
行与列存储了图的顶点,行与列的单元格存了边的加权值,如果2个顶点间的单元格值,则2个顶点相邻
由于存在很多的空单元格,大部分情况下矩阵是稀疏的,但是对于边很多的情况下,领接矩阵很适合
领接表
适用于边很稀疏的情况
对应于列的顶点列表,对每个顶点维护一个边的列表,保存了顶点与权重
空间更高效,并且保存了顶点直接连接的其它顶点
"""
import sys
class Vertex:
def __init__(self,num):
self.id = num
self.connectedTo = {}
self.color = 'white' # 是否已访问 white 未访问 gray 正在访问 black 已访问
self.dist = sys.maxsize
self.pred = None # 上级顶点
self.disc = 0 # 距离开始顶点的距离或者顶点标识为 gray 的时间
self.fin = 0 # 顶点标识为 black 的时间
def addNeighbor(self,nbr,weight=0):
self.connectedTo[nbr] = weight
def setColor(self,color):
self.color = color
def setDistance(self,d):
self.dist = d
def setPred(self,p):
self.pred = p
def setDiscovery(self,dtime):
self.disc = dtime
def setFinish(self,ftime):
self.fin = ftime
def getFinish(self):
return self.fin
def getDiscovery(self):
return self.disc
def getPred(self):
return self.pred
def getDistance(self):
return self.dist
def getColor(self):
return self.color
def getConnections(self):
return self.connectedTo.keys()
def getWeight(self,nbr):
return self.connectedTo[nbr]
def getId(self):
return self.id
class Graph(object):
def __init__(self):
self.vertList = {}
self.numVertices = 0
def addVertex(self, key):
ver = Vertex(key)
self.numVertices += 1
self.vertList[key] = ver
return ver
def getVertex(self, n):
if n in self.vertList:
return self.vertList[n]
return None
def __contains__(self, n):
return n in self.vertList
def addEdge(self, f, t, cost=0):
if f not in self.vertList:
self.addVertex(f)
if t not in self.vertList:
self.addVertex(t)
self.vertList[f].addNeighbor(self.vertList[t], cost)
def getVertices(self):
return self.vertList.keys()
def __iter__(self):
return iter(self.vertList.values())
u"""
构建字梯图
"""
def buildGraph(words): # O(n^2)
d = {}
g = Graph()
for word in words:
for i in range(len(word)):
bucket = word[:i] + '_' + word[i+1:]
if bucket in d:
d[bucket].append(word)
else:
d[bucket] = [word]
for bucket in d.keys():
for word1 in d[bucket]:
for word2 in d[bucket]:
if word1 != word2:
g.addEdge(word1, word2)
return g
u"""
广度优先 BFS
"""
def bfs(start): # O(V) 每个顶点只会访问一次
start.setDistance(0)
start.setPred(None)
vertQueue = Queue()
vertQueue.enqueue(start)
while (vertQueue.size() > 0):
currentVert = vertQueue.dequeue()
for nbr in currentVert.getConnections():
if (nbr.getColor() == 'white'):
nbr.setColor('gray')
nbr.setDistance(currentVert.getDistance() + 1)
nbr.setPred(currentVert)
vertQueue.enqueue(nbr)
currentVert.setColor('black')
def traverse(y): # 反向追踪找到路径
x = y
while x.getPred():
print x.getId()
x = x.getPred()
u"""
骑士之旅
棋盘上的每个单元格都需要访问到
"""
def knightGraph(bdSize):
graph = Graph()
for row in range(bdSize):
for col in range(bdSize):
nodeId = posToNodeId(row, col, bdSize)
newPositions = genLegalMoves(row, col, bdSize)
for e in newPositions:
nid = posToNodeId(e[0], e[1], bdSize)
graph.addEdge(nodeId, nid)
return graph
def posToNodeId(row, column, board_size):
return (row * board_size) + column
def genLegalMoves(x, y, bdSize):
newMoves = []
moveOffsets = [(-1,-2),(-1,2),(-2,-1),(-2,1),
( 1,-2),( 1,2),( 2,-1),( 2,1)]
for i in moveOffsets:
newX = x + i[0]
newY = y + i[1]
if legalCoord(newX, bdSize) and \
legalCoord(newY, bdSize):
newMoves.append((newX, newY))
return newMoves
def legalCoord(x, bdSize):
if x >= 0 and x < bdSize:
return True
return False
u"""
深度优先 DFS
"""
def knightTour(n, path, u, limit):
u"""
n: 搜索树中当前深度
path: 搜索路径节点列表
u: 希望找到的顶点
limit: 路径中的节点数,最大的深度
"""
u.setColor('gray')
path.append(u)
if n < limit:
# nbrList = list(u.getConnections())
nbrList = orderByAvail(u)
i = 0
done = False
while i < len(nbrList) and not done:
if nbrList[i].getColor() == 'white':
done = knightTour(n+1, path, nbrList[i], limit)
i = i + 1
if not done: # prepare to backtrack
path.pop()
u.setColor('white') # 回溯,遍历下一个邻居
else:
done = True
return done
u"""
启发式
倾向与先遍历到有较少的可选的节点,这样骑士会先覆盖棋盘周边的方格,然后向中间,
避免在中间路径过多的选择, 由中间向周边发散会产生更多的短路径
"""
def orderByAvail(n):
resList = []
for v in n.getConnections():
if v.getColor() == 'white':
c = 0
for w in v.getConnections():
if w.getColor() == 'white':
c = c + 1
resList.append((c,v))
resList.sort(key=lambda x: x[0])
return [y[1] for y in resList]
class DFSGraph(Graph):
def __init__(self):
super(DFSGraph, self).__init__()
self.time = 0
def dfs(self):
for aVertex in self:
aVertex.setColor('white')
aVertex.setPred(-1)
for aVertex in self:
if aVertex.getColor() == 'white':
self.dfsvisit(aVertex)
def dfsvisit(self, startVertex): # O(V + E) 覆盖每个顶点,并且每个边都会过一次
u"""
遍历过后的所有其它节点都能找到到开始节点的路径,构成深度优先森林
"""
startVertex.setColor('gray')
self.time += 1
startVertex.setDiscovery(self.time) # 开始遍历的时间
for nextVertex in startVertex.getConnections():
if nextVertex.getColor() == 'white':
nextVertex.setPred(startVertex)
self.dfsvisit(nextVertex)
startVertex.setColor('black')
self.time += 1
startVertex.setFinish(self.time) # 遍历完成的时间
u"""
拓扑排序
有向无环图
1. 对于某些图 g 调用 dfs(g)。我们想要调用深度优先搜索的主要原因是计算每个顶点的完成时间。
2. 以完成时间的递减顺序将顶点存储在列表中。
3. 返回有序列表作为拓扑排序的结果。得到正确的完成步骤
"""
u"""
强联通分量
C 是 G 的子集, 并且 C 的每个顶点都是互通的(强联通)
1. 调用dfs计算G的每个顶点的完成时间
2. 计算G^T 复制G,并翻转每个边的方向
3. 为G^T调用dfs, 但在 DFS 的主循环中,以完成时间的递减顺序探查每个顶点。
4. 在步骤 3 中计算的森林中的每个树是强连通分量。输出森林中每个树中每个顶点的顶点标识组件。
"""
def buildSCC():
g = DFSGraph()
g.addEdge('A', 'B')
g.addEdge('B', 'C')
g.addEdge('C', 'F')
g.addEdge('F', 'H')
g.addEdge('H', 'I')
g.addEdge('I', 'F')
g.addEdge('B', 'E')
g.addEdge('E', 'D')
g.addEdge('D', 'G')
g.addEdge('G', 'E')
g.addEdge('E', 'A')
g.addEdge('D', 'B')
g.dfs()
return g
def buildGT(g):
newG = DFSGraph()
for vert in g: # 构建G^T
for nbr in vert.connectedTo.keys():
newG.addEdge(nbr.getId(), vert.getId())
verts = sorted(newG.vertList.values(), key=lambda vert: g.getVertex(vert.getId()).getFinish(), reverse=True)
for aVertex in verts:
aVertex.setColor('white')
aVertex.setPred(-1)
for aVertex in verts:
if aVertex.getColor() == 'white':
newG.dfsvisit(aVertex) # 构建的子树即为强连通分量
return newG
u"""
最短路径问题
针对加权边的图
"""
class PriorityQueue(object):
u"""
优先级队列
"""
def __init__(self):
self.heapArray = [(0,0)]
self.currentSize = 0
def buildHeap(self,alist):
self.currentSize = len(alist)
self.heapArray = [(0,0)]
for i in alist:
self.heapArray.append(i)
i = len(alist) // 2
while (i > 0):
self.percDown(i)
i = i - 1
def percDown(self,i):
while (i * 2) <= self.currentSize:
mc = self.minChild(i)
if self.heapArray[i][0] > self.heapArray[mc][0]:
tmp = self.heapArray[i]
self.heapArray[i] = self.heapArray[mc]
self.heapArray[mc] = tmp
i = mc
def minChild(self,i):
if i*2 > self.currentSize:
return -1
else:
if i*2 + 1 > self.currentSize:
return i*2
else:
if self.heapArray[i*2][0] < self.heapArray[i*2+1][0]:
return i*2
else:
return i*2+1
def percUp(self,i):
while i // 2 > 0:
if self.heapArray[i][0] < self.heapArray[i//2][0]:
tmp = self.heapArray[i//2]
self.heapArray[i//2] = self.heapArray[i]
self.heapArray[i] = tmp
i = i//2
def add(self,k):
self.heapArray.append(k)
self.currentSize = self.currentSize + 1
self.percUp(self.currentSize)
def delMin(self):
retval = self.heapArray[1][1]
self.heapArray[1] = self.heapArray[self.currentSize]
self.currentSize = self.currentSize - 1
self.heapArray.pop()
self.percDown(1)
return retval
def isEmpty(self):
if self.currentSize == 0:
return True
else:
return False
def decreaseKey(self,val,amt):
# this is a little wierd, but we need to find the heap thing to decrease by
# looking at its value
done = False
i = 1
myKey = 0
while not done and i <= self.currentSize:
if self.heapArray[i][1] == val:
done = True
myKey = i
else:
i = i + 1
if myKey > 0:
self.heapArray[myKey] = (amt,self.heapArray[myKey][1])
self.percUp(myKey)
def __contains__(self,vtx):
for pair in self.heapArray:
if pair[1] == vtx:
return True
return False
u"""
dijkstra算法, 类似于bfs, 只是使用优先级队列, 按权重最小排列
最终算出其它顶点与开始顶点间最短距离
只有权重为正才能正常运行,否则会死循环
"""
def dijkstra(aGraph, start): # O((V + E)log^V )
pq = PriorityQueue()
start.setDistance(0)
pq.buildHeap([(v.getDistance(), v) for v in aGraph]) # 所有的都放进去了
while not pq.isEmpty():
currentVert = pq.delMin()
for nextVert in currentVert.getConnections():
newDist = currentVert.getDistance() \
+ currentVert.getWeight(nextVert)
if newDist < nextVert.getDistance():
nextVert.setDistance(newDist)
nextVert.setPred(currentVert)
pq.decreaseKey(nextVert, newDist) # 移动到队列的前面
u"""
最小权重生成树
Prim生成树算法
使用最小权重的边重写顶点的路径, 贪婪算法
"""
def prim(G, start):
pq = PriorityQueue()
for v in G:
v.setDistance(sys.maxint)
v.setPred(None)
start.setDistance(0)
pq.buildHeap([(v.getDistance(), v) for v in G])
while not pq.isEmpty():
currentVert = pq.delMin()
for nextVert in currentVert.getConnections():
newCost = currentVert.getWeight(nextVert) + currentVert.getDistance()
if nextVert in pq and newCost < nextVert.getDistance():
nextVert.setPred(currentVert)
nextVert.setDistance(newCost)
pq.decreaseKey(nextVert, newCost)